blob: 88f956c34fedb0cf9b58e618ccb4b8c0e427a8c3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
23#include "linux/config.h"
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
28#include "linux/devfs_fs_kernel.h"
29#include "linux/cdrom.h"
30#include "linux/proc_fs.h"
31#include "linux/ctype.h"
32#include "linux/capability.h"
33#include "linux/mm.h"
34#include "linux/vmalloc.h"
35#include "linux/blkpg.h"
36#include "linux/genhd.h"
37#include "linux/spinlock.h"
38#include "asm/segment.h"
39#include "asm/uaccess.h"
40#include "asm/irq.h"
41#include "asm/types.h"
42#include "asm/tlbflush.h"
43#include "user_util.h"
44#include "mem_user.h"
45#include "kern_util.h"
46#include "kern.h"
47#include "mconsole_kern.h"
48#include "init.h"
49#include "irq_user.h"
50#include "irq_kern.h"
51#include "ubd_user.h"
52#include "2_5compat.h"
53#include "os.h"
54#include "mem.h"
55#include "mem_kern.h"
56#include "cow.h"
57
Jeff Dike7b9014c2005-05-20 13:59:11 -070058enum ubd_req { UBD_READ, UBD_WRITE };
Linus Torvalds1da177e2005-04-16 15:20:36 -070059
60struct io_thread_req {
61 enum ubd_req op;
62 int fds[2];
63 unsigned long offsets[2];
64 unsigned long long offset;
65 unsigned long length;
66 char *buffer;
67 int sectorsize;
68 unsigned long sector_mask;
69 unsigned long long cow_offset;
70 unsigned long bitmap_words[2];
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 int error;
72};
73
74extern int open_ubd_file(char *file, struct openflags *openflags,
75 char **backing_file_out, int *bitmap_offset_out,
76 unsigned long *bitmap_len_out, int *data_offset_out,
77 int *create_cow_out);
78extern int create_cow_file(char *cow_file, char *backing_file,
79 struct openflags flags, int sectorsize,
80 int alignment, int *bitmap_offset_out,
81 unsigned long *bitmap_len_out,
82 int *data_offset_out);
83extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
84extern void do_io(struct io_thread_req *req);
85
86static inline int ubd_test_bit(__u64 bit, unsigned char *data)
87{
88 __u64 n;
89 int bits, off;
90
91 bits = sizeof(data[0]) * 8;
92 n = bit / bits;
93 off = bit % bits;
94 return((data[n] & (1 << off)) != 0);
95}
96
97static inline void ubd_set_bit(__u64 bit, unsigned char *data)
98{
99 __u64 n;
100 int bits, off;
101
102 bits = sizeof(data[0]) * 8;
103 n = bit / bits;
104 off = bit % bits;
105 data[n] |= (1 << off);
106}
107/*End stuff from ubd_user.h*/
108
109#define DRIVER_NAME "uml-blkdev"
110
111static DEFINE_SPINLOCK(ubd_io_lock);
112static DEFINE_SPINLOCK(ubd_lock);
113
114static void (*do_ubd)(void);
115
116static int ubd_open(struct inode * inode, struct file * filp);
117static int ubd_release(struct inode * inode, struct file * file);
118static int ubd_ioctl(struct inode * inode, struct file * file,
119 unsigned int cmd, unsigned long arg);
120
121#define MAX_DEV (8)
122
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123static struct block_device_operations ubd_blops = {
124 .owner = THIS_MODULE,
125 .open = ubd_open,
126 .release = ubd_release,
127 .ioctl = ubd_ioctl,
128};
129
130/* Protected by the queue_lock */
131static request_queue_t *ubd_queue;
132
133/* Protected by ubd_lock */
134static int fake_major = MAJOR_NR;
135
136static struct gendisk *ubd_gendisk[MAX_DEV];
137static struct gendisk *fake_gendisk[MAX_DEV];
138
139#ifdef CONFIG_BLK_DEV_UBD_SYNC
140#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
141 .cl = 1 })
142#else
143#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
144 .cl = 1 })
145#endif
146
147/* Not protected - changed only in ubd_setup_common and then only to
148 * to enable O_SYNC.
149 */
150static struct openflags global_openflags = OPEN_FLAGS;
151
152struct cow {
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700153 /* This is the backing file, actually */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 char *file;
155 int fd;
156 unsigned long *bitmap;
157 unsigned long bitmap_len;
158 int bitmap_offset;
159 int data_offset;
160};
161
162struct ubd {
163 char *file;
164 int count;
165 int fd;
166 __u64 size;
167 struct openflags boot_openflags;
168 struct openflags openflags;
169 int no_cow;
170 struct cow cow;
171 struct platform_device pdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172};
173
174#define DEFAULT_COW { \
175 .file = NULL, \
176 .fd = -1, \
177 .bitmap = NULL, \
178 .bitmap_offset = 0, \
179 .data_offset = 0, \
180}
181
182#define DEFAULT_UBD { \
183 .file = NULL, \
184 .count = 0, \
185 .fd = -1, \
186 .size = -1, \
187 .boot_openflags = OPEN_FLAGS, \
188 .openflags = OPEN_FLAGS, \
189 .no_cow = 0, \
190 .cow = DEFAULT_COW, \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191}
192
193struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
194
195static int ubd0_init(void)
196{
197 struct ubd *dev = &ubd_dev[0];
198
199 if(dev->file == NULL)
200 dev->file = "root_fs";
201 return(0);
202}
203
204__initcall(ubd0_init);
205
206/* Only changed by fake_ide_setup which is a setup */
207static int fake_ide = 0;
208static struct proc_dir_entry *proc_ide_root = NULL;
209static struct proc_dir_entry *proc_ide = NULL;
210
211static void make_proc_ide(void)
212{
213 proc_ide_root = proc_mkdir("ide", NULL);
214 proc_ide = proc_mkdir("ide0", proc_ide_root);
215}
216
217static int proc_ide_read_media(char *page, char **start, off_t off, int count,
218 int *eof, void *data)
219{
220 int len;
221
222 strcpy(page, "disk\n");
223 len = strlen("disk\n");
224 len -= off;
225 if (len < count){
226 *eof = 1;
227 if (len <= 0) return 0;
228 }
229 else len = count;
230 *start = page + off;
231 return len;
232}
233
234static void make_ide_entries(char *dev_name)
235{
236 struct proc_dir_entry *dir, *ent;
237 char name[64];
238
239 if(proc_ide_root == NULL) make_proc_ide();
240
241 dir = proc_mkdir(dev_name, proc_ide);
242 if(!dir) return;
243
244 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
245 if(!ent) return;
246 ent->nlink = 1;
247 ent->data = NULL;
248 ent->read_proc = proc_ide_read_media;
249 ent->write_proc = NULL;
250 sprintf(name,"ide0/%s", dev_name);
251 proc_symlink(dev_name, proc_ide_root, name);
252}
253
254static int fake_ide_setup(char *str)
255{
256 fake_ide = 1;
257 return(1);
258}
259
260__setup("fake_ide", fake_ide_setup);
261
262__uml_help(fake_ide_setup,
263"fake_ide\n"
264" Create ide0 entries that map onto ubd devices.\n\n"
265);
266
267static int parse_unit(char **ptr)
268{
269 char *str = *ptr, *end;
270 int n = -1;
271
272 if(isdigit(*str)) {
273 n = simple_strtoul(str, &end, 0);
274 if(end == str)
275 return(-1);
276 *ptr = end;
277 }
278 else if (('a' <= *str) && (*str <= 'h')) {
279 n = *str - 'a';
280 str++;
281 *ptr = str;
282 }
283 return(n);
284}
285
286static int ubd_setup_common(char *str, int *index_out)
287{
288 struct ubd *dev;
289 struct openflags flags = global_openflags;
290 char *backing_file;
291 int n, err, i;
292
293 if(index_out) *index_out = -1;
294 n = *str;
295 if(n == '='){
296 char *end;
297 int major;
298
299 str++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 if(!strcmp(str, "sync")){
301 global_openflags = of_sync(global_openflags);
302 return(0);
303 }
304 major = simple_strtoul(str, &end, 0);
305 if((*end != '\0') || (end == str)){
306 printk(KERN_ERR
307 "ubd_setup : didn't parse major number\n");
308 return(1);
309 }
310
311 err = 1;
312 spin_lock(&ubd_lock);
313 if(fake_major != MAJOR_NR){
314 printk(KERN_ERR "Can't assign a fake major twice\n");
315 goto out1;
316 }
317
318 fake_major = major;
319
320 printk(KERN_INFO "Setting extra ubd major number to %d\n",
321 major);
322 err = 0;
323 out1:
324 spin_unlock(&ubd_lock);
325 return(err);
326 }
327
328 n = parse_unit(&str);
329 if(n < 0){
330 printk(KERN_ERR "ubd_setup : couldn't parse unit number "
331 "'%s'\n", str);
332 return(1);
333 }
334 if(n >= MAX_DEV){
335 printk(KERN_ERR "ubd_setup : index %d out of range "
336 "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
337 return(1);
338 }
339
340 err = 1;
341 spin_lock(&ubd_lock);
342
343 dev = &ubd_dev[n];
344 if(dev->file != NULL){
345 printk(KERN_ERR "ubd_setup : device already configured\n");
346 goto out;
347 }
348
349 if (index_out)
350 *index_out = n;
351
352 for (i = 0; i < 4; i++) {
353 switch (*str) {
354 case 'r':
355 flags.w = 0;
356 break;
357 case 's':
358 flags.s = 1;
359 break;
360 case 'd':
361 dev->no_cow = 1;
362 break;
363 case '=':
364 str++;
365 goto break_loop;
366 default:
367 printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r,s or d)\n");
368 goto out;
369 }
370 str++;
371 }
372
373 if (*str == '=')
374 printk(KERN_ERR "ubd_setup : Too many flags specified\n");
375 else
376 printk(KERN_ERR "ubd_setup : Expected '='\n");
377 goto out;
378
379break_loop:
380 err = 0;
381 backing_file = strchr(str, ',');
382
383 if (!backing_file) {
384 backing_file = strchr(str, ':');
385 }
386
387 if(backing_file){
388 if(dev->no_cow)
389 printk(KERN_ERR "Can't specify both 'd' and a "
390 "cow file\n");
391 else {
392 *backing_file = '\0';
393 backing_file++;
394 }
395 }
396 dev->file = str;
397 dev->cow.file = backing_file;
398 dev->boot_openflags = flags;
399out:
400 spin_unlock(&ubd_lock);
401 return(err);
402}
403
404static int ubd_setup(char *str)
405{
406 ubd_setup_common(str, NULL);
407 return(1);
408}
409
410__setup("ubd", ubd_setup);
411__uml_help(ubd_setup,
412"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
413" This is used to associate a device with a file in the underlying\n"
414" filesystem. When specifying two filenames, the first one is the\n"
415" COW name and the second is the backing file name. As separator you can\n"
416" use either a ':' or a ',': the first one allows writing things like;\n"
417" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
418" while with a ',' the shell would not expand the 2nd '~'.\n"
419" When using only one filename, UML will detect whether to thread it like\n"
420" a COW file or a backing file. To override this detection, add the 'd'\n"
421" flag:\n"
422" ubd0d=BackingFile\n"
423" Usually, there is a filesystem in the file, but \n"
424" that's not required. Swap devices containing swap files can be\n"
425" specified like this. Also, a file which doesn't contain a\n"
426" filesystem can have its contents read in the virtual \n"
427" machine by running 'dd' on the device. <n> must be in the range\n"
428" 0 to 7. Appending an 'r' to the number will cause that device\n"
429" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
430" an 's' will cause data to be written to disk on the host immediately.\n\n"
431);
432
433static int udb_setup(char *str)
434{
435 printk("udb%s specified on command line is almost certainly a ubd -> "
436 "udb TYPO\n", str);
437 return(1);
438}
439
440__setup("udb", udb_setup);
441__uml_help(udb_setup,
442"udb\n"
443" This option is here solely to catch ubd -> udb typos, which can be\n\n"
444" to impossible to catch visually unless you specifically look for\n\n"
445" them. The only result of any option starting with 'udb' is an error\n\n"
446" in the boot output.\n\n"
447);
448
449static int fakehd_set = 0;
450static int fakehd(char *str)
451{
452 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
453 fakehd_set = 1;
454 return 1;
455}
456
457__setup("fakehd", fakehd);
458__uml_help(fakehd,
459"fakehd\n"
460" Change the ubd device name to \"hd\".\n\n"
461);
462
463static void do_ubd_request(request_queue_t * q);
464
465/* Only changed by ubd_init, which is an initcall. */
466int thread_fd = -1;
467
468/* Changed by ubd_handler, which is serialized because interrupts only
469 * happen on CPU 0.
470 */
471int intr_count = 0;
472
473/* call ubd_finish if you need to serialize */
474static void __ubd_finish(struct request *req, int error)
475{
476 int nsect;
477
478 if(error){
479 end_request(req, 0);
480 return;
481 }
482 nsect = req->current_nr_sectors;
483 req->sector += nsect;
484 req->buffer += nsect << 9;
485 req->errors = 0;
486 req->nr_sectors -= nsect;
487 req->current_nr_sectors = 0;
488 end_request(req, 1);
489}
490
491static inline void ubd_finish(struct request *req, int error)
492{
493 spin_lock(&ubd_io_lock);
494 __ubd_finish(req, error);
495 spin_unlock(&ubd_io_lock);
496}
497
498/* Called without ubd_io_lock held */
499static void ubd_handler(void)
500{
501 struct io_thread_req req;
502 struct request *rq = elv_next_request(ubd_queue);
Jeff Dike7b9014c2005-05-20 13:59:11 -0700503 int n;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504
505 do_ubd = NULL;
506 intr_count++;
507 n = os_read_file(thread_fd, &req, sizeof(req));
508 if(n != sizeof(req)){
509 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
510 "err = %d\n", os_getpid(), -n);
511 spin_lock(&ubd_io_lock);
512 end_request(rq, 0);
513 spin_unlock(&ubd_io_lock);
514 return;
515 }
516
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517 ubd_finish(rq, req.error);
518 reactivate_fd(thread_fd, UBD_IRQ);
519 do_ubd_request(ubd_queue);
520}
521
522static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
523{
524 ubd_handler();
525 return(IRQ_HANDLED);
526}
527
528/* Only changed by ubd_init, which is an initcall. */
529static int io_pid = -1;
530
531void kill_io_thread(void)
532{
533 if(io_pid != -1)
534 os_kill_process(io_pid, 1);
535}
536
537__uml_exitcall(kill_io_thread);
538
539static int ubd_file_size(struct ubd *dev, __u64 *size_out)
540{
541 char *file;
542
543 file = dev->cow.file ? dev->cow.file : dev->file;
544 return(os_file_size(file, size_out));
545}
546
547static void ubd_close(struct ubd *dev)
548{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 os_close_file(dev->fd);
550 if(dev->cow.file == NULL)
551 return;
552
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 os_close_file(dev->cow.fd);
554 vfree(dev->cow.bitmap);
555 dev->cow.bitmap = NULL;
556}
557
558static int ubd_open_dev(struct ubd *dev)
559{
560 struct openflags flags;
561 char **back_ptr;
562 int err, create_cow, *create_ptr;
563
564 dev->openflags = dev->boot_openflags;
565 create_cow = 0;
566 create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
567 back_ptr = dev->no_cow ? NULL : &dev->cow.file;
568 dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
569 &dev->cow.bitmap_offset, &dev->cow.bitmap_len,
570 &dev->cow.data_offset, create_ptr);
571
572 if((dev->fd == -ENOENT) && create_cow){
573 dev->fd = create_cow_file(dev->file, dev->cow.file,
574 dev->openflags, 1 << 9, PAGE_SIZE,
575 &dev->cow.bitmap_offset,
576 &dev->cow.bitmap_len,
577 &dev->cow.data_offset);
578 if(dev->fd >= 0){
579 printk(KERN_INFO "Creating \"%s\" as COW file for "
580 "\"%s\"\n", dev->file, dev->cow.file);
581 }
582 }
583
584 if(dev->fd < 0){
585 printk("Failed to open '%s', errno = %d\n", dev->file,
586 -dev->fd);
587 return(dev->fd);
588 }
589
590 if(dev->cow.file != NULL){
591 err = -ENOMEM;
592 dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
593 if(dev->cow.bitmap == NULL){
594 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
595 goto error;
596 }
597 flush_tlb_kernel_vm();
598
599 err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
600 dev->cow.bitmap_offset,
601 dev->cow.bitmap_len);
602 if(err < 0)
603 goto error;
604
605 flags = dev->openflags;
606 flags.w = 0;
607 err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL,
608 NULL, NULL);
609 if(err < 0) goto error;
610 dev->cow.fd = err;
611 }
612 return(0);
613 error:
614 os_close_file(dev->fd);
615 return(err);
616}
617
618static int ubd_new_disk(int major, u64 size, int unit,
619 struct gendisk **disk_out)
620
621{
622 struct gendisk *disk;
623 char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
624 int err;
625
626 disk = alloc_disk(1 << UBD_SHIFT);
627 if(disk == NULL)
628 return(-ENOMEM);
629
630 disk->major = major;
631 disk->first_minor = unit << UBD_SHIFT;
632 disk->fops = &ubd_blops;
633 set_capacity(disk, size / 512);
634 if(major == MAJOR_NR){
635 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
636 sprintf(disk->devfs_name, "ubd/disc%d", unit);
637 sprintf(from, "ubd/%d", unit);
638 sprintf(to, "disc%d/disc", unit);
639 err = devfs_mk_symlink(from, to);
640 if(err)
641 printk("ubd_new_disk failed to make link from %s to "
642 "%s, error = %d\n", from, to, err);
643 }
644 else {
645 sprintf(disk->disk_name, "ubd_fake%d", unit);
646 sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
647 }
648
649 /* sysfs register (not for ide fake devices) */
650 if (major == MAJOR_NR) {
651 ubd_dev[unit].pdev.id = unit;
652 ubd_dev[unit].pdev.name = DRIVER_NAME;
653 platform_device_register(&ubd_dev[unit].pdev);
654 disk->driverfs_dev = &ubd_dev[unit].pdev.dev;
655 }
656
657 disk->private_data = &ubd_dev[unit];
658 disk->queue = ubd_queue;
659 add_disk(disk);
660
661 *disk_out = disk;
662 return 0;
663}
664
665#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
666
667static int ubd_add(int n)
668{
669 struct ubd *dev = &ubd_dev[n];
670 int err;
671
672 if(dev->file == NULL)
673 return(-ENODEV);
674
675 if (ubd_open_dev(dev))
676 return(-ENODEV);
677
678 err = ubd_file_size(dev, &dev->size);
679 if(err < 0)
680 return(err);
681
682 dev->size = ROUND_BLOCK(dev->size);
683
684 err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
685 if(err)
686 return(err);
687
688 if(fake_major != MAJOR_NR)
689 ubd_new_disk(fake_major, dev->size, n,
690 &fake_gendisk[n]);
691
692 /* perhaps this should also be under the "if (fake_major)" above */
693 /* using the fake_disk->disk_name and also the fakehd_set name */
694 if (fake_ide)
695 make_ide_entries(ubd_gendisk[n]->disk_name);
696
697 ubd_close(dev);
698 return 0;
699}
700
701static int ubd_config(char *str)
702{
703 int n, err;
704
705 str = uml_strdup(str);
706 if(str == NULL){
707 printk(KERN_ERR "ubd_config failed to strdup string\n");
708 return(1);
709 }
710 err = ubd_setup_common(str, &n);
711 if(err){
712 kfree(str);
713 return(-1);
714 }
715 if(n == -1) return(0);
716
717 spin_lock(&ubd_lock);
718 err = ubd_add(n);
719 if(err)
720 ubd_dev[n].file = NULL;
721 spin_unlock(&ubd_lock);
722
723 return(err);
724}
725
726static int ubd_get_config(char *name, char *str, int size, char **error_out)
727{
728 struct ubd *dev;
729 int n, len = 0;
730
731 n = parse_unit(&name);
732 if((n >= MAX_DEV) || (n < 0)){
733 *error_out = "ubd_get_config : device number out of range";
734 return(-1);
735 }
736
737 dev = &ubd_dev[n];
738 spin_lock(&ubd_lock);
739
740 if(dev->file == NULL){
741 CONFIG_CHUNK(str, size, len, "", 1);
742 goto out;
743 }
744
745 CONFIG_CHUNK(str, size, len, dev->file, 0);
746
747 if(dev->cow.file != NULL){
748 CONFIG_CHUNK(str, size, len, ",", 0);
749 CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
750 }
751 else CONFIG_CHUNK(str, size, len, "", 1);
752
753 out:
754 spin_unlock(&ubd_lock);
755 return(len);
756}
757
758static int ubd_remove(char *str)
759{
760 struct ubd *dev;
761 int n, err = -ENODEV;
762
763 n = parse_unit(&str);
764
765 if((n < 0) || (n >= MAX_DEV))
766 return(err);
767
768 dev = &ubd_dev[n];
769 if(dev->count > 0)
770 return(-EBUSY); /* you cannot remove a open disk */
771
772 err = 0;
773 spin_lock(&ubd_lock);
774
775 if(ubd_gendisk[n] == NULL)
776 goto out;
777
778 del_gendisk(ubd_gendisk[n]);
779 put_disk(ubd_gendisk[n]);
780 ubd_gendisk[n] = NULL;
781
782 if(fake_gendisk[n] != NULL){
783 del_gendisk(fake_gendisk[n]);
784 put_disk(fake_gendisk[n]);
785 fake_gendisk[n] = NULL;
786 }
787
788 platform_device_unregister(&dev->pdev);
789 *dev = ((struct ubd) DEFAULT_UBD);
790 err = 0;
791 out:
792 spin_unlock(&ubd_lock);
793 return(err);
794}
795
796static struct mc_device ubd_mc = {
797 .name = "ubd",
798 .config = ubd_config,
799 .get_config = ubd_get_config,
800 .remove = ubd_remove,
801};
802
803static int ubd_mc_init(void)
804{
805 mconsole_register_dev(&ubd_mc);
806 return 0;
807}
808
809__initcall(ubd_mc_init);
810
811static struct device_driver ubd_driver = {
812 .name = DRIVER_NAME,
813 .bus = &platform_bus_type,
814};
815
816int ubd_init(void)
817{
818 int i;
819
820 devfs_mk_dir("ubd");
821 if (register_blkdev(MAJOR_NR, "ubd"))
822 return -1;
823
824 ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
825 if (!ubd_queue) {
826 unregister_blkdev(MAJOR_NR, "ubd");
827 return -1;
828 }
829
830 if (fake_major != MAJOR_NR) {
831 char name[sizeof("ubd_nnn\0")];
832
833 snprintf(name, sizeof(name), "ubd_%d", fake_major);
834 devfs_mk_dir(name);
835 if (register_blkdev(fake_major, "ubd"))
836 return -1;
837 }
838 driver_register(&ubd_driver);
839 for (i = 0; i < MAX_DEV; i++)
840 ubd_add(i);
841 return 0;
842}
843
844late_initcall(ubd_init);
845
846int ubd_driver_init(void){
847 unsigned long stack;
848 int err;
849
850 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
851 if(global_openflags.s){
852 printk(KERN_INFO "ubd: Synchronous mode\n");
853 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
854 * enough. So use anyway the io thread. */
855 }
856 stack = alloc_stack(0, 0);
857 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
858 &thread_fd);
859 if(io_pid < 0){
860 printk(KERN_ERR
861 "ubd : Failed to start I/O thread (errno = %d) - "
862 "falling back to synchronous I/O\n", -io_pid);
863 io_pid = -1;
864 return(0);
865 }
866 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
867 SA_INTERRUPT, "ubd", ubd_dev);
868 if(err != 0)
869 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
870 return(err);
871}
872
873device_initcall(ubd_driver_init);
874
875static int ubd_open(struct inode *inode, struct file *filp)
876{
877 struct gendisk *disk = inode->i_bdev->bd_disk;
878 struct ubd *dev = disk->private_data;
879 int err = 0;
880
881 if(dev->count == 0){
882 err = ubd_open_dev(dev);
883 if(err){
884 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
885 disk->disk_name, dev->file, -err);
886 goto out;
887 }
888 }
889 dev->count++;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700890 set_disk_ro(disk, !dev->openflags.w);
891
892 /* This should no more be needed. And it didn't work anyway to exclude
893 * read-write remounting of filesystems.*/
894 /*if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 if(--dev->count == 0) ubd_close(dev);
896 err = -EROFS;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700897 }*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 out:
899 return(err);
900}
901
902static int ubd_release(struct inode * inode, struct file * file)
903{
904 struct gendisk *disk = inode->i_bdev->bd_disk;
905 struct ubd *dev = disk->private_data;
906
907 if(--dev->count == 0)
908 ubd_close(dev);
909 return(0);
910}
911
912static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
913 __u64 *cow_offset, unsigned long *bitmap,
914 __u64 bitmap_offset, unsigned long *bitmap_words,
915 __u64 bitmap_len)
916{
917 __u64 sector = io_offset >> 9;
918 int i, update_bitmap = 0;
919
920 for(i = 0; i < length >> 9; i++){
921 if(cow_mask != NULL)
922 ubd_set_bit(i, (unsigned char *) cow_mask);
923 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
924 continue;
925
926 update_bitmap = 1;
927 ubd_set_bit(sector + i, (unsigned char *) bitmap);
928 }
929
930 if(!update_bitmap)
931 return;
932
933 *cow_offset = sector / (sizeof(unsigned long) * 8);
934
935 /* This takes care of the case where we're exactly at the end of the
936 * device, and *cow_offset + 1 is off the end. So, just back it up
937 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
938 * for the original diagnosis.
939 */
940 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
941 sizeof(unsigned long) - 1))
942 (*cow_offset)--;
943
944 bitmap_words[0] = bitmap[*cow_offset];
945 bitmap_words[1] = bitmap[*cow_offset + 1];
946
947 *cow_offset *= sizeof(unsigned long);
948 *cow_offset += bitmap_offset;
949}
950
951static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
952 __u64 bitmap_offset, __u64 bitmap_len)
953{
954 __u64 sector = req->offset >> 9;
955 int i;
956
957 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
958 panic("Operation too long");
959
960 if(req->op == UBD_READ) {
961 for(i = 0; i < req->length >> 9; i++){
962 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
963 ubd_set_bit(i, (unsigned char *)
964 &req->sector_mask);
965 }
966 }
967 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
968 &req->cow_offset, bitmap, bitmap_offset,
969 req->bitmap_words, bitmap_len);
970}
971
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972/* Called with ubd_io_lock held */
973static int prepare_request(struct request *req, struct io_thread_req *io_req)
974{
975 struct gendisk *disk = req->rq_disk;
976 struct ubd *dev = disk->private_data;
977 __u64 offset;
Jeff Dike7b9014c2005-05-20 13:59:11 -0700978 int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979
980 if(req->rq_status == RQ_INACTIVE) return(1);
981
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700982 /* This should be impossible now */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
984 printk("Write attempted on readonly ubd device %s\n",
985 disk->disk_name);
986 end_request(req, 0);
987 return(1);
988 }
989
990 offset = ((__u64) req->sector) << 9;
991 len = req->current_nr_sectors << 9;
992
993 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
994 io_req->fds[1] = dev->fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 io_req->cow_offset = -1;
996 io_req->offset = offset;
997 io_req->length = len;
998 io_req->error = 0;
999 io_req->sector_mask = 0;
1000
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1002 io_req->offsets[0] = 0;
1003 io_req->offsets[1] = dev->cow.data_offset;
1004 io_req->buffer = req->buffer;
1005 io_req->sectorsize = 1 << 9;
1006
1007 if(dev->cow.file != NULL)
1008 cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
1009 dev->cow.bitmap_len);
1010
1011 return(0);
1012}
1013
1014/* Called with ubd_io_lock held */
1015static void do_ubd_request(request_queue_t *q)
1016{
1017 struct io_thread_req io_req;
1018 struct request *req;
1019 int err, n;
1020
1021 if(thread_fd == -1){
1022 while((req = elv_next_request(q)) != NULL){
1023 err = prepare_request(req, &io_req);
1024 if(!err){
1025 do_io(&io_req);
1026 __ubd_finish(req, io_req.error);
1027 }
1028 }
1029 }
1030 else {
1031 if(do_ubd || (req = elv_next_request(q)) == NULL)
1032 return;
1033 err = prepare_request(req, &io_req);
1034 if(!err){
1035 do_ubd = ubd_handler;
1036 n = os_write_file(thread_fd, (char *) &io_req,
1037 sizeof(io_req));
1038 if(n != sizeof(io_req))
1039 printk("write to io thread failed, "
1040 "errno = %d\n", -n);
1041 }
1042 }
1043}
1044
1045static int ubd_ioctl(struct inode * inode, struct file * file,
1046 unsigned int cmd, unsigned long arg)
1047{
1048 struct hd_geometry __user *loc = (struct hd_geometry __user *) arg;
1049 struct ubd *dev = inode->i_bdev->bd_disk->private_data;
1050 struct hd_driveid ubd_id = {
1051 .cyls = 0,
1052 .heads = 128,
1053 .sectors = 32,
1054 };
1055
1056 switch (cmd) {
1057 struct hd_geometry g;
1058 struct cdrom_volctrl volume;
1059 case HDIO_GETGEO:
1060 if(!loc) return(-EINVAL);
1061 g.heads = 128;
1062 g.sectors = 32;
1063 g.cylinders = dev->size / (128 * 32 * 512);
1064 g.start = get_start_sect(inode->i_bdev);
1065 return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
1066
1067 case HDIO_GET_IDENTITY:
1068 ubd_id.cyls = dev->size / (128 * 32 * 512);
1069 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1070 sizeof(ubd_id)))
1071 return(-EFAULT);
1072 return(0);
1073
1074 case CDROMVOLREAD:
1075 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1076 return(-EFAULT);
1077 volume.channel0 = 255;
1078 volume.channel1 = 255;
1079 volume.channel2 = 255;
1080 volume.channel3 = 255;
1081 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1082 return(-EFAULT);
1083 return(0);
1084 }
1085 return(-EINVAL);
1086}
1087
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
1089{
1090 struct uml_stat buf1, buf2;
1091 int err;
1092
1093 if(from_cmdline == NULL) return(1);
1094 if(!strcmp(from_cmdline, from_cow)) return(1);
1095
1096 err = os_stat_file(from_cmdline, &buf1);
1097 if(err < 0){
1098 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
1099 return(1);
1100 }
1101 err = os_stat_file(from_cow, &buf2);
1102 if(err < 0){
1103 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
1104 return(1);
1105 }
1106 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
1107 return(1);
1108
1109 printk("Backing file mismatch - \"%s\" requested,\n"
1110 "\"%s\" specified in COW header of \"%s\"\n",
1111 from_cmdline, from_cow, cow);
1112 return(0);
1113}
1114
1115static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1116{
1117 unsigned long modtime;
1118 long long actual;
1119 int err;
1120
1121 err = os_file_modtime(file, &modtime);
1122 if(err < 0){
1123 printk("Failed to get modification time of backing file "
1124 "\"%s\", err = %d\n", file, -err);
1125 return(err);
1126 }
1127
1128 err = os_file_size(file, &actual);
1129 if(err < 0){
1130 printk("Failed to get size of backing file \"%s\", "
1131 "err = %d\n", file, -err);
1132 return(err);
1133 }
1134
1135 if(actual != size){
1136 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1137 * the typecast.*/
1138 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1139 "file\n", (unsigned long long) size, actual);
1140 return(-EINVAL);
1141 }
1142 if(modtime != mtime){
1143 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1144 "file\n", mtime, modtime);
1145 return(-EINVAL);
1146 }
1147 return(0);
1148}
1149
1150int read_cow_bitmap(int fd, void *buf, int offset, int len)
1151{
1152 int err;
1153
1154 err = os_seek_file(fd, offset);
1155 if(err < 0)
1156 return(err);
1157
1158 err = os_read_file(fd, buf, len);
1159 if(err < 0)
1160 return(err);
1161
1162 return(0);
1163}
1164
1165int open_ubd_file(char *file, struct openflags *openflags,
1166 char **backing_file_out, int *bitmap_offset_out,
1167 unsigned long *bitmap_len_out, int *data_offset_out,
1168 int *create_cow_out)
1169{
1170 time_t mtime;
1171 unsigned long long size;
1172 __u32 version, align;
1173 char *backing_file;
1174 int fd, err, sectorsize, same, mode = 0644;
1175
1176 fd = os_open_file(file, *openflags, mode);
1177 if(fd < 0){
1178 if((fd == -ENOENT) && (create_cow_out != NULL))
1179 *create_cow_out = 1;
1180 if(!openflags->w ||
1181 ((fd != -EROFS) && (fd != -EACCES))) return(fd);
1182 openflags->w = 0;
1183 fd = os_open_file(file, *openflags, mode);
1184 if(fd < 0)
1185 return(fd);
1186 }
1187
1188 err = os_lock_file(fd, openflags->w);
1189 if(err < 0){
1190 printk("Failed to lock '%s', err = %d\n", file, -err);
1191 goto out_close;
1192 }
1193
1194 if(backing_file_out == NULL) return(fd);
1195
1196 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1197 &size, &sectorsize, &align, bitmap_offset_out);
1198 if(err && (*backing_file_out != NULL)){
1199 printk("Failed to read COW header from COW file \"%s\", "
1200 "errno = %d\n", file, -err);
1201 goto out_close;
1202 }
1203 if(err) return(fd);
1204
1205 if(backing_file_out == NULL) return(fd);
1206
1207 same = same_backing_files(*backing_file_out, backing_file, file);
1208
1209 if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){
1210 printk("Switching backing file to '%s'\n", *backing_file_out);
1211 err = write_cow_header(file, fd, *backing_file_out,
1212 sectorsize, align, &size);
1213 if(err){
1214 printk("Switch failed, errno = %d\n", -err);
1215 return(err);
1216 }
1217 }
1218 else {
1219 *backing_file_out = backing_file;
1220 err = backing_file_mismatch(*backing_file_out, size, mtime);
1221 if(err) goto out_close;
1222 }
1223
1224 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1225 bitmap_len_out, data_offset_out);
1226
1227 return(fd);
1228 out_close:
1229 os_close_file(fd);
1230 return(err);
1231}
1232
1233int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1234 int sectorsize, int alignment, int *bitmap_offset_out,
1235 unsigned long *bitmap_len_out, int *data_offset_out)
1236{
1237 int err, fd;
1238
1239 flags.c = 1;
1240 fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
1241 if(fd < 0){
1242 err = fd;
1243 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1244 -err);
1245 goto out;
1246 }
1247
1248 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1249 bitmap_offset_out, bitmap_len_out,
1250 data_offset_out);
1251 if(!err)
1252 return(fd);
1253 os_close_file(fd);
1254 out:
1255 return(err);
1256}
1257
1258static int update_bitmap(struct io_thread_req *req)
1259{
1260 int n;
1261
1262 if(req->cow_offset == -1)
1263 return(0);
1264
1265 n = os_seek_file(req->fds[1], req->cow_offset);
1266 if(n < 0){
1267 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1268 return(1);
1269 }
1270
1271 n = os_write_file(req->fds[1], &req->bitmap_words,
1272 sizeof(req->bitmap_words));
1273 if(n != sizeof(req->bitmap_words)){
1274 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1275 req->fds[1]);
1276 return(1);
1277 }
1278
1279 return(0);
1280}
1281
1282void do_io(struct io_thread_req *req)
1283{
1284 char *buf;
1285 unsigned long len;
1286 int n, nsectors, start, end, bit;
1287 int err;
1288 __u64 off;
1289
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 nsectors = req->length / req->sectorsize;
1291 start = 0;
1292 do {
1293 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1294 end = start;
1295 while((end < nsectors) &&
1296 (ubd_test_bit(end, (unsigned char *)
1297 &req->sector_mask) == bit))
1298 end++;
1299
1300 off = req->offset + req->offsets[bit] +
1301 start * req->sectorsize;
1302 len = (end - start) * req->sectorsize;
1303 buf = &req->buffer[start * req->sectorsize];
1304
1305 err = os_seek_file(req->fds[bit], off);
1306 if(err < 0){
1307 printk("do_io - lseek failed : err = %d\n", -err);
1308 req->error = 1;
1309 return;
1310 }
1311 if(req->op == UBD_READ){
1312 n = 0;
1313 do {
1314 buf = &buf[n];
1315 len -= n;
1316 n = os_read_file(req->fds[bit], buf, len);
1317 if (n < 0) {
1318 printk("do_io - read failed, err = %d "
1319 "fd = %d\n", -n, req->fds[bit]);
1320 req->error = 1;
1321 return;
1322 }
1323 } while((n < len) && (n != 0));
1324 if (n < len) memset(&buf[n], 0, len - n);
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -07001325 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326 n = os_write_file(req->fds[bit], buf, len);
1327 if(n != len){
1328 printk("do_io - write failed err = %d "
1329 "fd = %d\n", -n, req->fds[bit]);
1330 req->error = 1;
1331 return;
1332 }
1333 }
1334
1335 start = end;
1336 } while(start < nsectors);
1337
1338 req->error = update_bitmap(req);
1339}
1340
1341/* Changed in start_io_thread, which is serialized by being called only
1342 * from ubd_init, which is an initcall.
1343 */
1344int kernel_fd = -1;
1345
1346/* Only changed by the io thread */
1347int io_count = 0;
1348
1349int io_thread(void *arg)
1350{
1351 struct io_thread_req req;
1352 int n;
1353
1354 ignore_sigwinch_sig();
1355 while(1){
1356 n = os_read_file(kernel_fd, &req, sizeof(req));
1357 if(n != sizeof(req)){
1358 if(n < 0)
1359 printk("io_thread - read failed, fd = %d, "
1360 "err = %d\n", kernel_fd, -n);
1361 else {
1362 printk("io_thread - short read, fd = %d, "
1363 "length = %d\n", kernel_fd, n);
1364 }
1365 continue;
1366 }
1367 io_count++;
1368 do_io(&req);
1369 n = os_write_file(kernel_fd, &req, sizeof(req));
1370 if(n != sizeof(req))
1371 printk("io_thread - write failed, fd = %d, err = %d\n",
1372 kernel_fd, -n);
1373 }
1374}
1375
1376/*
1377 * Overrides for Emacs so that we follow Linus's tabbing style.
1378 * Emacs will notice this stuff at the end of the file and automatically
1379 * adjust the settings for this buffer only. This must remain at the end
1380 * of the file.
1381 * ---------------------------------------------------------------------------
1382 * Local variables:
1383 * c-file-style: "linux"
1384 * End:
1385 */