blob: f73134333f64ee55c9eb7f87521e89f577400297 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
23#include "linux/config.h"
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
28#include "linux/devfs_fs_kernel.h"
29#include "linux/cdrom.h"
30#include "linux/proc_fs.h"
31#include "linux/ctype.h"
32#include "linux/capability.h"
33#include "linux/mm.h"
34#include "linux/vmalloc.h"
35#include "linux/blkpg.h"
36#include "linux/genhd.h"
37#include "linux/spinlock.h"
38#include "asm/segment.h"
39#include "asm/uaccess.h"
40#include "asm/irq.h"
41#include "asm/types.h"
42#include "asm/tlbflush.h"
43#include "user_util.h"
44#include "mem_user.h"
45#include "kern_util.h"
46#include "kern.h"
47#include "mconsole_kern.h"
48#include "init.h"
49#include "irq_user.h"
50#include "irq_kern.h"
51#include "ubd_user.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include "os.h"
53#include "mem.h"
54#include "mem_kern.h"
55#include "cow.h"
56
Jeff Dike7b9014c2005-05-20 13:59:11 -070057enum ubd_req { UBD_READ, UBD_WRITE };
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
59struct io_thread_req {
60 enum ubd_req op;
61 int fds[2];
62 unsigned long offsets[2];
63 unsigned long long offset;
64 unsigned long length;
65 char *buffer;
66 int sectorsize;
67 unsigned long sector_mask;
68 unsigned long long cow_offset;
69 unsigned long bitmap_words[2];
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 int error;
71};
72
73extern int open_ubd_file(char *file, struct openflags *openflags,
74 char **backing_file_out, int *bitmap_offset_out,
75 unsigned long *bitmap_len_out, int *data_offset_out,
76 int *create_cow_out);
77extern int create_cow_file(char *cow_file, char *backing_file,
78 struct openflags flags, int sectorsize,
79 int alignment, int *bitmap_offset_out,
80 unsigned long *bitmap_len_out,
81 int *data_offset_out);
82extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
83extern void do_io(struct io_thread_req *req);
84
85static inline int ubd_test_bit(__u64 bit, unsigned char *data)
86{
87 __u64 n;
88 int bits, off;
89
90 bits = sizeof(data[0]) * 8;
91 n = bit / bits;
92 off = bit % bits;
93 return((data[n] & (1 << off)) != 0);
94}
95
96static inline void ubd_set_bit(__u64 bit, unsigned char *data)
97{
98 __u64 n;
99 int bits, off;
100
101 bits = sizeof(data[0]) * 8;
102 n = bit / bits;
103 off = bit % bits;
104 data[n] |= (1 << off);
105}
106/*End stuff from ubd_user.h*/
107
108#define DRIVER_NAME "uml-blkdev"
109
110static DEFINE_SPINLOCK(ubd_io_lock);
111static DEFINE_SPINLOCK(ubd_lock);
112
113static void (*do_ubd)(void);
114
115static int ubd_open(struct inode * inode, struct file * filp);
116static int ubd_release(struct inode * inode, struct file * file);
117static int ubd_ioctl(struct inode * inode, struct file * file,
118 unsigned int cmd, unsigned long arg);
119
120#define MAX_DEV (8)
121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122static struct block_device_operations ubd_blops = {
123 .owner = THIS_MODULE,
124 .open = ubd_open,
125 .release = ubd_release,
126 .ioctl = ubd_ioctl,
127};
128
129/* Protected by the queue_lock */
130static request_queue_t *ubd_queue;
131
132/* Protected by ubd_lock */
133static int fake_major = MAJOR_NR;
134
135static struct gendisk *ubd_gendisk[MAX_DEV];
136static struct gendisk *fake_gendisk[MAX_DEV];
137
138#ifdef CONFIG_BLK_DEV_UBD_SYNC
139#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
140 .cl = 1 })
141#else
142#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
143 .cl = 1 })
144#endif
145
146/* Not protected - changed only in ubd_setup_common and then only to
147 * to enable O_SYNC.
148 */
149static struct openflags global_openflags = OPEN_FLAGS;
150
151struct cow {
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700152 /* This is the backing file, actually */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 char *file;
154 int fd;
155 unsigned long *bitmap;
156 unsigned long bitmap_len;
157 int bitmap_offset;
158 int data_offset;
159};
160
161struct ubd {
162 char *file;
163 int count;
164 int fd;
165 __u64 size;
166 struct openflags boot_openflags;
167 struct openflags openflags;
168 int no_cow;
169 struct cow cow;
170 struct platform_device pdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171};
172
173#define DEFAULT_COW { \
174 .file = NULL, \
175 .fd = -1, \
176 .bitmap = NULL, \
177 .bitmap_offset = 0, \
178 .data_offset = 0, \
179}
180
181#define DEFAULT_UBD { \
182 .file = NULL, \
183 .count = 0, \
184 .fd = -1, \
185 .size = -1, \
186 .boot_openflags = OPEN_FLAGS, \
187 .openflags = OPEN_FLAGS, \
188 .no_cow = 0, \
189 .cow = DEFAULT_COW, \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190}
191
192struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
193
194static int ubd0_init(void)
195{
196 struct ubd *dev = &ubd_dev[0];
197
198 if(dev->file == NULL)
199 dev->file = "root_fs";
200 return(0);
201}
202
203__initcall(ubd0_init);
204
205/* Only changed by fake_ide_setup which is a setup */
206static int fake_ide = 0;
207static struct proc_dir_entry *proc_ide_root = NULL;
208static struct proc_dir_entry *proc_ide = NULL;
209
210static void make_proc_ide(void)
211{
212 proc_ide_root = proc_mkdir("ide", NULL);
213 proc_ide = proc_mkdir("ide0", proc_ide_root);
214}
215
216static int proc_ide_read_media(char *page, char **start, off_t off, int count,
217 int *eof, void *data)
218{
219 int len;
220
221 strcpy(page, "disk\n");
222 len = strlen("disk\n");
223 len -= off;
224 if (len < count){
225 *eof = 1;
226 if (len <= 0) return 0;
227 }
228 else len = count;
229 *start = page + off;
230 return len;
231}
232
233static void make_ide_entries(char *dev_name)
234{
235 struct proc_dir_entry *dir, *ent;
236 char name[64];
237
238 if(proc_ide_root == NULL) make_proc_ide();
239
240 dir = proc_mkdir(dev_name, proc_ide);
241 if(!dir) return;
242
243 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
244 if(!ent) return;
245 ent->nlink = 1;
246 ent->data = NULL;
247 ent->read_proc = proc_ide_read_media;
248 ent->write_proc = NULL;
249 sprintf(name,"ide0/%s", dev_name);
250 proc_symlink(dev_name, proc_ide_root, name);
251}
252
253static int fake_ide_setup(char *str)
254{
255 fake_ide = 1;
256 return(1);
257}
258
259__setup("fake_ide", fake_ide_setup);
260
261__uml_help(fake_ide_setup,
262"fake_ide\n"
263" Create ide0 entries that map onto ubd devices.\n\n"
264);
265
266static int parse_unit(char **ptr)
267{
268 char *str = *ptr, *end;
269 int n = -1;
270
271 if(isdigit(*str)) {
272 n = simple_strtoul(str, &end, 0);
273 if(end == str)
274 return(-1);
275 *ptr = end;
276 }
277 else if (('a' <= *str) && (*str <= 'h')) {
278 n = *str - 'a';
279 str++;
280 *ptr = str;
281 }
282 return(n);
283}
284
285static int ubd_setup_common(char *str, int *index_out)
286{
287 struct ubd *dev;
288 struct openflags flags = global_openflags;
289 char *backing_file;
290 int n, err, i;
291
292 if(index_out) *index_out = -1;
293 n = *str;
294 if(n == '='){
295 char *end;
296 int major;
297
298 str++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 if(!strcmp(str, "sync")){
300 global_openflags = of_sync(global_openflags);
301 return(0);
302 }
303 major = simple_strtoul(str, &end, 0);
304 if((*end != '\0') || (end == str)){
305 printk(KERN_ERR
306 "ubd_setup : didn't parse major number\n");
307 return(1);
308 }
309
310 err = 1;
311 spin_lock(&ubd_lock);
312 if(fake_major != MAJOR_NR){
313 printk(KERN_ERR "Can't assign a fake major twice\n");
314 goto out1;
315 }
316
317 fake_major = major;
318
319 printk(KERN_INFO "Setting extra ubd major number to %d\n",
320 major);
321 err = 0;
322 out1:
323 spin_unlock(&ubd_lock);
324 return(err);
325 }
326
327 n = parse_unit(&str);
328 if(n < 0){
329 printk(KERN_ERR "ubd_setup : couldn't parse unit number "
330 "'%s'\n", str);
331 return(1);
332 }
333 if(n >= MAX_DEV){
334 printk(KERN_ERR "ubd_setup : index %d out of range "
335 "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
336 return(1);
337 }
338
339 err = 1;
340 spin_lock(&ubd_lock);
341
342 dev = &ubd_dev[n];
343 if(dev->file != NULL){
344 printk(KERN_ERR "ubd_setup : device already configured\n");
345 goto out;
346 }
347
348 if (index_out)
349 *index_out = n;
350
351 for (i = 0; i < 4; i++) {
352 switch (*str) {
353 case 'r':
354 flags.w = 0;
355 break;
356 case 's':
357 flags.s = 1;
358 break;
359 case 'd':
360 dev->no_cow = 1;
361 break;
362 case '=':
363 str++;
364 goto break_loop;
365 default:
366 printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r,s or d)\n");
367 goto out;
368 }
369 str++;
370 }
371
372 if (*str == '=')
373 printk(KERN_ERR "ubd_setup : Too many flags specified\n");
374 else
375 printk(KERN_ERR "ubd_setup : Expected '='\n");
376 goto out;
377
378break_loop:
379 err = 0;
380 backing_file = strchr(str, ',');
381
382 if (!backing_file) {
383 backing_file = strchr(str, ':');
384 }
385
386 if(backing_file){
387 if(dev->no_cow)
388 printk(KERN_ERR "Can't specify both 'd' and a "
389 "cow file\n");
390 else {
391 *backing_file = '\0';
392 backing_file++;
393 }
394 }
395 dev->file = str;
396 dev->cow.file = backing_file;
397 dev->boot_openflags = flags;
398out:
399 spin_unlock(&ubd_lock);
400 return(err);
401}
402
403static int ubd_setup(char *str)
404{
405 ubd_setup_common(str, NULL);
406 return(1);
407}
408
409__setup("ubd", ubd_setup);
410__uml_help(ubd_setup,
411"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
412" This is used to associate a device with a file in the underlying\n"
413" filesystem. When specifying two filenames, the first one is the\n"
414" COW name and the second is the backing file name. As separator you can\n"
415" use either a ':' or a ',': the first one allows writing things like;\n"
416" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
417" while with a ',' the shell would not expand the 2nd '~'.\n"
418" When using only one filename, UML will detect whether to thread it like\n"
419" a COW file or a backing file. To override this detection, add the 'd'\n"
420" flag:\n"
421" ubd0d=BackingFile\n"
422" Usually, there is a filesystem in the file, but \n"
423" that's not required. Swap devices containing swap files can be\n"
424" specified like this. Also, a file which doesn't contain a\n"
425" filesystem can have its contents read in the virtual \n"
426" machine by running 'dd' on the device. <n> must be in the range\n"
427" 0 to 7. Appending an 'r' to the number will cause that device\n"
428" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
429" an 's' will cause data to be written to disk on the host immediately.\n\n"
430);
431
432static int udb_setup(char *str)
433{
434 printk("udb%s specified on command line is almost certainly a ubd -> "
435 "udb TYPO\n", str);
436 return(1);
437}
438
439__setup("udb", udb_setup);
440__uml_help(udb_setup,
441"udb\n"
Jeff Dike0894e272005-05-28 15:51:55 -0700442" This option is here solely to catch ubd -> udb typos, which can be\n"
443" to impossible to catch visually unless you specifically look for\n"
444" them. The only result of any option starting with 'udb' is an error\n"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445" in the boot output.\n\n"
446);
447
448static int fakehd_set = 0;
449static int fakehd(char *str)
450{
451 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
452 fakehd_set = 1;
453 return 1;
454}
455
456__setup("fakehd", fakehd);
457__uml_help(fakehd,
458"fakehd\n"
459" Change the ubd device name to \"hd\".\n\n"
460);
461
462static void do_ubd_request(request_queue_t * q);
463
464/* Only changed by ubd_init, which is an initcall. */
465int thread_fd = -1;
466
467/* Changed by ubd_handler, which is serialized because interrupts only
468 * happen on CPU 0.
469 */
470int intr_count = 0;
471
472/* call ubd_finish if you need to serialize */
473static void __ubd_finish(struct request *req, int error)
474{
475 int nsect;
476
477 if(error){
478 end_request(req, 0);
479 return;
480 }
481 nsect = req->current_nr_sectors;
482 req->sector += nsect;
483 req->buffer += nsect << 9;
484 req->errors = 0;
485 req->nr_sectors -= nsect;
486 req->current_nr_sectors = 0;
487 end_request(req, 1);
488}
489
490static inline void ubd_finish(struct request *req, int error)
491{
492 spin_lock(&ubd_io_lock);
493 __ubd_finish(req, error);
494 spin_unlock(&ubd_io_lock);
495}
496
497/* Called without ubd_io_lock held */
498static void ubd_handler(void)
499{
500 struct io_thread_req req;
501 struct request *rq = elv_next_request(ubd_queue);
Jeff Dike7b9014c2005-05-20 13:59:11 -0700502 int n;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503
504 do_ubd = NULL;
505 intr_count++;
506 n = os_read_file(thread_fd, &req, sizeof(req));
507 if(n != sizeof(req)){
508 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
509 "err = %d\n", os_getpid(), -n);
510 spin_lock(&ubd_io_lock);
511 end_request(rq, 0);
512 spin_unlock(&ubd_io_lock);
513 return;
514 }
515
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 ubd_finish(rq, req.error);
517 reactivate_fd(thread_fd, UBD_IRQ);
518 do_ubd_request(ubd_queue);
519}
520
521static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
522{
523 ubd_handler();
524 return(IRQ_HANDLED);
525}
526
527/* Only changed by ubd_init, which is an initcall. */
528static int io_pid = -1;
529
530void kill_io_thread(void)
531{
532 if(io_pid != -1)
533 os_kill_process(io_pid, 1);
534}
535
536__uml_exitcall(kill_io_thread);
537
538static int ubd_file_size(struct ubd *dev, __u64 *size_out)
539{
540 char *file;
541
542 file = dev->cow.file ? dev->cow.file : dev->file;
543 return(os_file_size(file, size_out));
544}
545
546static void ubd_close(struct ubd *dev)
547{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 os_close_file(dev->fd);
549 if(dev->cow.file == NULL)
550 return;
551
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 os_close_file(dev->cow.fd);
553 vfree(dev->cow.bitmap);
554 dev->cow.bitmap = NULL;
555}
556
557static int ubd_open_dev(struct ubd *dev)
558{
559 struct openflags flags;
560 char **back_ptr;
561 int err, create_cow, *create_ptr;
562
563 dev->openflags = dev->boot_openflags;
564 create_cow = 0;
565 create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
566 back_ptr = dev->no_cow ? NULL : &dev->cow.file;
567 dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
568 &dev->cow.bitmap_offset, &dev->cow.bitmap_len,
569 &dev->cow.data_offset, create_ptr);
570
571 if((dev->fd == -ENOENT) && create_cow){
572 dev->fd = create_cow_file(dev->file, dev->cow.file,
573 dev->openflags, 1 << 9, PAGE_SIZE,
574 &dev->cow.bitmap_offset,
575 &dev->cow.bitmap_len,
576 &dev->cow.data_offset);
577 if(dev->fd >= 0){
578 printk(KERN_INFO "Creating \"%s\" as COW file for "
579 "\"%s\"\n", dev->file, dev->cow.file);
580 }
581 }
582
583 if(dev->fd < 0){
584 printk("Failed to open '%s', errno = %d\n", dev->file,
585 -dev->fd);
586 return(dev->fd);
587 }
588
589 if(dev->cow.file != NULL){
590 err = -ENOMEM;
591 dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
592 if(dev->cow.bitmap == NULL){
593 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
594 goto error;
595 }
596 flush_tlb_kernel_vm();
597
598 err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
599 dev->cow.bitmap_offset,
600 dev->cow.bitmap_len);
601 if(err < 0)
602 goto error;
603
604 flags = dev->openflags;
605 flags.w = 0;
606 err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL,
607 NULL, NULL);
608 if(err < 0) goto error;
609 dev->cow.fd = err;
610 }
611 return(0);
612 error:
613 os_close_file(dev->fd);
614 return(err);
615}
616
617static int ubd_new_disk(int major, u64 size, int unit,
618 struct gendisk **disk_out)
619
620{
621 struct gendisk *disk;
622 char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
623 int err;
624
625 disk = alloc_disk(1 << UBD_SHIFT);
626 if(disk == NULL)
627 return(-ENOMEM);
628
629 disk->major = major;
630 disk->first_minor = unit << UBD_SHIFT;
631 disk->fops = &ubd_blops;
632 set_capacity(disk, size / 512);
633 if(major == MAJOR_NR){
634 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
635 sprintf(disk->devfs_name, "ubd/disc%d", unit);
636 sprintf(from, "ubd/%d", unit);
637 sprintf(to, "disc%d/disc", unit);
638 err = devfs_mk_symlink(from, to);
639 if(err)
640 printk("ubd_new_disk failed to make link from %s to "
641 "%s, error = %d\n", from, to, err);
642 }
643 else {
644 sprintf(disk->disk_name, "ubd_fake%d", unit);
645 sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
646 }
647
648 /* sysfs register (not for ide fake devices) */
649 if (major == MAJOR_NR) {
650 ubd_dev[unit].pdev.id = unit;
651 ubd_dev[unit].pdev.name = DRIVER_NAME;
652 platform_device_register(&ubd_dev[unit].pdev);
653 disk->driverfs_dev = &ubd_dev[unit].pdev.dev;
654 }
655
656 disk->private_data = &ubd_dev[unit];
657 disk->queue = ubd_queue;
658 add_disk(disk);
659
660 *disk_out = disk;
661 return 0;
662}
663
664#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
665
666static int ubd_add(int n)
667{
668 struct ubd *dev = &ubd_dev[n];
669 int err;
670
Jeff Dikeec7cf782005-09-03 15:57:29 -0700671 err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 if(dev->file == NULL)
Jeff Dikeec7cf782005-09-03 15:57:29 -0700673 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674
675 if (ubd_open_dev(dev))
Jeff Dikeec7cf782005-09-03 15:57:29 -0700676 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
678 err = ubd_file_size(dev, &dev->size);
679 if(err < 0)
Jeff Dikeec7cf782005-09-03 15:57:29 -0700680 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681
682 dev->size = ROUND_BLOCK(dev->size);
683
684 err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
685 if(err)
Jeff Dikeec7cf782005-09-03 15:57:29 -0700686 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687
688 if(fake_major != MAJOR_NR)
689 ubd_new_disk(fake_major, dev->size, n,
690 &fake_gendisk[n]);
691
692 /* perhaps this should also be under the "if (fake_major)" above */
693 /* using the fake_disk->disk_name and also the fakehd_set name */
694 if (fake_ide)
695 make_ide_entries(ubd_gendisk[n]->disk_name);
696
Jeff Dikeec7cf782005-09-03 15:57:29 -0700697 err = 0;
698out_close:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 ubd_close(dev);
Jeff Dikeec7cf782005-09-03 15:57:29 -0700700out:
701 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702}
703
704static int ubd_config(char *str)
705{
706 int n, err;
707
708 str = uml_strdup(str);
709 if(str == NULL){
710 printk(KERN_ERR "ubd_config failed to strdup string\n");
711 return(1);
712 }
713 err = ubd_setup_common(str, &n);
714 if(err){
715 kfree(str);
716 return(-1);
717 }
718 if(n == -1) return(0);
719
720 spin_lock(&ubd_lock);
721 err = ubd_add(n);
722 if(err)
723 ubd_dev[n].file = NULL;
724 spin_unlock(&ubd_lock);
725
726 return(err);
727}
728
729static int ubd_get_config(char *name, char *str, int size, char **error_out)
730{
731 struct ubd *dev;
732 int n, len = 0;
733
734 n = parse_unit(&name);
735 if((n >= MAX_DEV) || (n < 0)){
736 *error_out = "ubd_get_config : device number out of range";
737 return(-1);
738 }
739
740 dev = &ubd_dev[n];
741 spin_lock(&ubd_lock);
742
743 if(dev->file == NULL){
744 CONFIG_CHUNK(str, size, len, "", 1);
745 goto out;
746 }
747
748 CONFIG_CHUNK(str, size, len, dev->file, 0);
749
750 if(dev->cow.file != NULL){
751 CONFIG_CHUNK(str, size, len, ",", 0);
752 CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
753 }
754 else CONFIG_CHUNK(str, size, len, "", 1);
755
756 out:
757 spin_unlock(&ubd_lock);
758 return(len);
759}
760
Jeff Dike29d56cf2005-06-25 14:55:25 -0700761static int ubd_id(char **str, int *start_out, int *end_out)
762{
763 int n;
764
765 n = parse_unit(str);
766 *start_out = 0;
767 *end_out = MAX_DEV - 1;
768 return n;
769}
770
771static int ubd_remove(int n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772{
773 struct ubd *dev;
Jeff Dike29d56cf2005-06-25 14:55:25 -0700774 int err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775
Jeff Dike29d56cf2005-06-25 14:55:25 -0700776 spin_lock(&ubd_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777
778 if(ubd_gendisk[n] == NULL)
779 goto out;
780
Jeff Dike29d56cf2005-06-25 14:55:25 -0700781 dev = &ubd_dev[n];
782
783 if(dev->file == NULL)
784 goto out;
785
786 /* you cannot remove a open disk */
787 err = -EBUSY;
788 if(dev->count > 0)
789 goto out;
790
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791 del_gendisk(ubd_gendisk[n]);
792 put_disk(ubd_gendisk[n]);
793 ubd_gendisk[n] = NULL;
794
795 if(fake_gendisk[n] != NULL){
796 del_gendisk(fake_gendisk[n]);
797 put_disk(fake_gendisk[n]);
798 fake_gendisk[n] = NULL;
799 }
800
801 platform_device_unregister(&dev->pdev);
802 *dev = ((struct ubd) DEFAULT_UBD);
803 err = 0;
Jeff Dike29d56cf2005-06-25 14:55:25 -0700804out:
805 spin_unlock(&ubd_lock);
806 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807}
808
809static struct mc_device ubd_mc = {
810 .name = "ubd",
811 .config = ubd_config,
812 .get_config = ubd_get_config,
Jeff Dike29d56cf2005-06-25 14:55:25 -0700813 .id = ubd_id,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 .remove = ubd_remove,
815};
816
817static int ubd_mc_init(void)
818{
819 mconsole_register_dev(&ubd_mc);
820 return 0;
821}
822
823__initcall(ubd_mc_init);
824
825static struct device_driver ubd_driver = {
826 .name = DRIVER_NAME,
827 .bus = &platform_bus_type,
828};
829
830int ubd_init(void)
831{
832 int i;
833
834 devfs_mk_dir("ubd");
835 if (register_blkdev(MAJOR_NR, "ubd"))
836 return -1;
837
838 ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
839 if (!ubd_queue) {
840 unregister_blkdev(MAJOR_NR, "ubd");
841 return -1;
842 }
843
844 if (fake_major != MAJOR_NR) {
845 char name[sizeof("ubd_nnn\0")];
846
847 snprintf(name, sizeof(name), "ubd_%d", fake_major);
848 devfs_mk_dir(name);
849 if (register_blkdev(fake_major, "ubd"))
850 return -1;
851 }
852 driver_register(&ubd_driver);
853 for (i = 0; i < MAX_DEV; i++)
854 ubd_add(i);
855 return 0;
856}
857
858late_initcall(ubd_init);
859
860int ubd_driver_init(void){
861 unsigned long stack;
862 int err;
863
864 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
865 if(global_openflags.s){
866 printk(KERN_INFO "ubd: Synchronous mode\n");
867 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
868 * enough. So use anyway the io thread. */
869 }
870 stack = alloc_stack(0, 0);
871 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
872 &thread_fd);
873 if(io_pid < 0){
874 printk(KERN_ERR
875 "ubd : Failed to start I/O thread (errno = %d) - "
876 "falling back to synchronous I/O\n", -io_pid);
877 io_pid = -1;
878 return(0);
879 }
880 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
881 SA_INTERRUPT, "ubd", ubd_dev);
882 if(err != 0)
883 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
884 return(err);
885}
886
887device_initcall(ubd_driver_init);
888
889static int ubd_open(struct inode *inode, struct file *filp)
890{
891 struct gendisk *disk = inode->i_bdev->bd_disk;
892 struct ubd *dev = disk->private_data;
893 int err = 0;
894
895 if(dev->count == 0){
896 err = ubd_open_dev(dev);
897 if(err){
898 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
899 disk->disk_name, dev->file, -err);
900 goto out;
901 }
902 }
903 dev->count++;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700904 set_disk_ro(disk, !dev->openflags.w);
905
906 /* This should no more be needed. And it didn't work anyway to exclude
907 * read-write remounting of filesystems.*/
908 /*if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 if(--dev->count == 0) ubd_close(dev);
910 err = -EROFS;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700911 }*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 out:
913 return(err);
914}
915
916static int ubd_release(struct inode * inode, struct file * file)
917{
918 struct gendisk *disk = inode->i_bdev->bd_disk;
919 struct ubd *dev = disk->private_data;
920
921 if(--dev->count == 0)
922 ubd_close(dev);
923 return(0);
924}
925
926static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
927 __u64 *cow_offset, unsigned long *bitmap,
928 __u64 bitmap_offset, unsigned long *bitmap_words,
929 __u64 bitmap_len)
930{
931 __u64 sector = io_offset >> 9;
932 int i, update_bitmap = 0;
933
934 for(i = 0; i < length >> 9; i++){
935 if(cow_mask != NULL)
936 ubd_set_bit(i, (unsigned char *) cow_mask);
937 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
938 continue;
939
940 update_bitmap = 1;
941 ubd_set_bit(sector + i, (unsigned char *) bitmap);
942 }
943
944 if(!update_bitmap)
945 return;
946
947 *cow_offset = sector / (sizeof(unsigned long) * 8);
948
949 /* This takes care of the case where we're exactly at the end of the
950 * device, and *cow_offset + 1 is off the end. So, just back it up
951 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
952 * for the original diagnosis.
953 */
954 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
955 sizeof(unsigned long) - 1))
956 (*cow_offset)--;
957
958 bitmap_words[0] = bitmap[*cow_offset];
959 bitmap_words[1] = bitmap[*cow_offset + 1];
960
961 *cow_offset *= sizeof(unsigned long);
962 *cow_offset += bitmap_offset;
963}
964
965static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
966 __u64 bitmap_offset, __u64 bitmap_len)
967{
968 __u64 sector = req->offset >> 9;
969 int i;
970
971 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
972 panic("Operation too long");
973
974 if(req->op == UBD_READ) {
975 for(i = 0; i < req->length >> 9; i++){
976 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
977 ubd_set_bit(i, (unsigned char *)
978 &req->sector_mask);
979 }
980 }
981 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
982 &req->cow_offset, bitmap, bitmap_offset,
983 req->bitmap_words, bitmap_len);
984}
985
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986/* Called with ubd_io_lock held */
987static int prepare_request(struct request *req, struct io_thread_req *io_req)
988{
989 struct gendisk *disk = req->rq_disk;
990 struct ubd *dev = disk->private_data;
991 __u64 offset;
Jeff Dike7b9014c2005-05-20 13:59:11 -0700992 int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993
994 if(req->rq_status == RQ_INACTIVE) return(1);
995
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700996 /* This should be impossible now */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
998 printk("Write attempted on readonly ubd device %s\n",
999 disk->disk_name);
1000 end_request(req, 0);
1001 return(1);
1002 }
1003
1004 offset = ((__u64) req->sector) << 9;
1005 len = req->current_nr_sectors << 9;
1006
1007 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
1008 io_req->fds[1] = dev->fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 io_req->cow_offset = -1;
1010 io_req->offset = offset;
1011 io_req->length = len;
1012 io_req->error = 0;
1013 io_req->sector_mask = 0;
1014
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1016 io_req->offsets[0] = 0;
1017 io_req->offsets[1] = dev->cow.data_offset;
1018 io_req->buffer = req->buffer;
1019 io_req->sectorsize = 1 << 9;
1020
1021 if(dev->cow.file != NULL)
1022 cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
1023 dev->cow.bitmap_len);
1024
1025 return(0);
1026}
1027
1028/* Called with ubd_io_lock held */
1029static void do_ubd_request(request_queue_t *q)
1030{
1031 struct io_thread_req io_req;
1032 struct request *req;
1033 int err, n;
1034
1035 if(thread_fd == -1){
1036 while((req = elv_next_request(q)) != NULL){
1037 err = prepare_request(req, &io_req);
1038 if(!err){
1039 do_io(&io_req);
1040 __ubd_finish(req, io_req.error);
1041 }
1042 }
1043 }
1044 else {
1045 if(do_ubd || (req = elv_next_request(q)) == NULL)
1046 return;
1047 err = prepare_request(req, &io_req);
1048 if(!err){
1049 do_ubd = ubd_handler;
1050 n = os_write_file(thread_fd, (char *) &io_req,
1051 sizeof(io_req));
1052 if(n != sizeof(io_req))
1053 printk("write to io thread failed, "
1054 "errno = %d\n", -n);
1055 }
1056 }
1057}
1058
1059static int ubd_ioctl(struct inode * inode, struct file * file,
1060 unsigned int cmd, unsigned long arg)
1061{
1062 struct hd_geometry __user *loc = (struct hd_geometry __user *) arg;
1063 struct ubd *dev = inode->i_bdev->bd_disk->private_data;
1064 struct hd_driveid ubd_id = {
1065 .cyls = 0,
1066 .heads = 128,
1067 .sectors = 32,
1068 };
1069
1070 switch (cmd) {
1071 struct hd_geometry g;
1072 struct cdrom_volctrl volume;
1073 case HDIO_GETGEO:
1074 if(!loc) return(-EINVAL);
1075 g.heads = 128;
1076 g.sectors = 32;
1077 g.cylinders = dev->size / (128 * 32 * 512);
1078 g.start = get_start_sect(inode->i_bdev);
1079 return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
1080
1081 case HDIO_GET_IDENTITY:
1082 ubd_id.cyls = dev->size / (128 * 32 * 512);
1083 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1084 sizeof(ubd_id)))
1085 return(-EFAULT);
1086 return(0);
1087
1088 case CDROMVOLREAD:
1089 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1090 return(-EFAULT);
1091 volume.channel0 = 255;
1092 volume.channel1 = 255;
1093 volume.channel2 = 255;
1094 volume.channel3 = 255;
1095 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1096 return(-EFAULT);
1097 return(0);
1098 }
1099 return(-EINVAL);
1100}
1101
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
1103{
1104 struct uml_stat buf1, buf2;
1105 int err;
1106
1107 if(from_cmdline == NULL) return(1);
1108 if(!strcmp(from_cmdline, from_cow)) return(1);
1109
1110 err = os_stat_file(from_cmdline, &buf1);
1111 if(err < 0){
1112 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
1113 return(1);
1114 }
1115 err = os_stat_file(from_cow, &buf2);
1116 if(err < 0){
1117 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
1118 return(1);
1119 }
1120 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
1121 return(1);
1122
1123 printk("Backing file mismatch - \"%s\" requested,\n"
1124 "\"%s\" specified in COW header of \"%s\"\n",
1125 from_cmdline, from_cow, cow);
1126 return(0);
1127}
1128
1129static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1130{
1131 unsigned long modtime;
1132 long long actual;
1133 int err;
1134
1135 err = os_file_modtime(file, &modtime);
1136 if(err < 0){
1137 printk("Failed to get modification time of backing file "
1138 "\"%s\", err = %d\n", file, -err);
1139 return(err);
1140 }
1141
1142 err = os_file_size(file, &actual);
1143 if(err < 0){
1144 printk("Failed to get size of backing file \"%s\", "
1145 "err = %d\n", file, -err);
1146 return(err);
1147 }
1148
1149 if(actual != size){
1150 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1151 * the typecast.*/
1152 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1153 "file\n", (unsigned long long) size, actual);
1154 return(-EINVAL);
1155 }
1156 if(modtime != mtime){
1157 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1158 "file\n", mtime, modtime);
1159 return(-EINVAL);
1160 }
1161 return(0);
1162}
1163
1164int read_cow_bitmap(int fd, void *buf, int offset, int len)
1165{
1166 int err;
1167
1168 err = os_seek_file(fd, offset);
1169 if(err < 0)
1170 return(err);
1171
1172 err = os_read_file(fd, buf, len);
1173 if(err < 0)
1174 return(err);
1175
1176 return(0);
1177}
1178
1179int open_ubd_file(char *file, struct openflags *openflags,
1180 char **backing_file_out, int *bitmap_offset_out,
1181 unsigned long *bitmap_len_out, int *data_offset_out,
1182 int *create_cow_out)
1183{
1184 time_t mtime;
1185 unsigned long long size;
1186 __u32 version, align;
1187 char *backing_file;
1188 int fd, err, sectorsize, same, mode = 0644;
1189
1190 fd = os_open_file(file, *openflags, mode);
1191 if(fd < 0){
1192 if((fd == -ENOENT) && (create_cow_out != NULL))
1193 *create_cow_out = 1;
1194 if(!openflags->w ||
1195 ((fd != -EROFS) && (fd != -EACCES))) return(fd);
1196 openflags->w = 0;
1197 fd = os_open_file(file, *openflags, mode);
1198 if(fd < 0)
1199 return(fd);
1200 }
1201
1202 err = os_lock_file(fd, openflags->w);
1203 if(err < 0){
1204 printk("Failed to lock '%s', err = %d\n", file, -err);
1205 goto out_close;
1206 }
1207
1208 if(backing_file_out == NULL) return(fd);
1209
1210 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1211 &size, &sectorsize, &align, bitmap_offset_out);
1212 if(err && (*backing_file_out != NULL)){
1213 printk("Failed to read COW header from COW file \"%s\", "
1214 "errno = %d\n", file, -err);
1215 goto out_close;
1216 }
1217 if(err) return(fd);
1218
1219 if(backing_file_out == NULL) return(fd);
1220
1221 same = same_backing_files(*backing_file_out, backing_file, file);
1222
1223 if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){
1224 printk("Switching backing file to '%s'\n", *backing_file_out);
1225 err = write_cow_header(file, fd, *backing_file_out,
1226 sectorsize, align, &size);
1227 if(err){
1228 printk("Switch failed, errno = %d\n", -err);
1229 return(err);
1230 }
1231 }
1232 else {
1233 *backing_file_out = backing_file;
1234 err = backing_file_mismatch(*backing_file_out, size, mtime);
1235 if(err) goto out_close;
1236 }
1237
1238 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1239 bitmap_len_out, data_offset_out);
1240
1241 return(fd);
1242 out_close:
1243 os_close_file(fd);
1244 return(err);
1245}
1246
1247int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1248 int sectorsize, int alignment, int *bitmap_offset_out,
1249 unsigned long *bitmap_len_out, int *data_offset_out)
1250{
1251 int err, fd;
1252
1253 flags.c = 1;
1254 fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
1255 if(fd < 0){
1256 err = fd;
1257 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1258 -err);
1259 goto out;
1260 }
1261
1262 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1263 bitmap_offset_out, bitmap_len_out,
1264 data_offset_out);
1265 if(!err)
1266 return(fd);
1267 os_close_file(fd);
1268 out:
1269 return(err);
1270}
1271
1272static int update_bitmap(struct io_thread_req *req)
1273{
1274 int n;
1275
1276 if(req->cow_offset == -1)
1277 return(0);
1278
1279 n = os_seek_file(req->fds[1], req->cow_offset);
1280 if(n < 0){
1281 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1282 return(1);
1283 }
1284
1285 n = os_write_file(req->fds[1], &req->bitmap_words,
1286 sizeof(req->bitmap_words));
1287 if(n != sizeof(req->bitmap_words)){
1288 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1289 req->fds[1]);
1290 return(1);
1291 }
1292
1293 return(0);
1294}
1295
1296void do_io(struct io_thread_req *req)
1297{
1298 char *buf;
1299 unsigned long len;
1300 int n, nsectors, start, end, bit;
1301 int err;
1302 __u64 off;
1303
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 nsectors = req->length / req->sectorsize;
1305 start = 0;
1306 do {
1307 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1308 end = start;
1309 while((end < nsectors) &&
1310 (ubd_test_bit(end, (unsigned char *)
1311 &req->sector_mask) == bit))
1312 end++;
1313
1314 off = req->offset + req->offsets[bit] +
1315 start * req->sectorsize;
1316 len = (end - start) * req->sectorsize;
1317 buf = &req->buffer[start * req->sectorsize];
1318
1319 err = os_seek_file(req->fds[bit], off);
1320 if(err < 0){
1321 printk("do_io - lseek failed : err = %d\n", -err);
1322 req->error = 1;
1323 return;
1324 }
1325 if(req->op == UBD_READ){
1326 n = 0;
1327 do {
1328 buf = &buf[n];
1329 len -= n;
1330 n = os_read_file(req->fds[bit], buf, len);
1331 if (n < 0) {
1332 printk("do_io - read failed, err = %d "
1333 "fd = %d\n", -n, req->fds[bit]);
1334 req->error = 1;
1335 return;
1336 }
1337 } while((n < len) && (n != 0));
1338 if (n < len) memset(&buf[n], 0, len - n);
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -07001339 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 n = os_write_file(req->fds[bit], buf, len);
1341 if(n != len){
1342 printk("do_io - write failed err = %d "
1343 "fd = %d\n", -n, req->fds[bit]);
1344 req->error = 1;
1345 return;
1346 }
1347 }
1348
1349 start = end;
1350 } while(start < nsectors);
1351
1352 req->error = update_bitmap(req);
1353}
1354
1355/* Changed in start_io_thread, which is serialized by being called only
1356 * from ubd_init, which is an initcall.
1357 */
1358int kernel_fd = -1;
1359
1360/* Only changed by the io thread */
1361int io_count = 0;
1362
1363int io_thread(void *arg)
1364{
1365 struct io_thread_req req;
1366 int n;
1367
1368 ignore_sigwinch_sig();
1369 while(1){
1370 n = os_read_file(kernel_fd, &req, sizeof(req));
1371 if(n != sizeof(req)){
1372 if(n < 0)
1373 printk("io_thread - read failed, fd = %d, "
1374 "err = %d\n", kernel_fd, -n);
1375 else {
1376 printk("io_thread - short read, fd = %d, "
1377 "length = %d\n", kernel_fd, n);
1378 }
1379 continue;
1380 }
1381 io_count++;
1382 do_io(&req);
1383 n = os_write_file(kernel_fd, &req, sizeof(req));
1384 if(n != sizeof(req))
1385 printk("io_thread - write failed, fd = %d, err = %d\n",
1386 kernel_fd, -n);
1387 }
1388}
1389
1390/*
1391 * Overrides for Emacs so that we follow Linus's tabbing style.
1392 * Emacs will notice this stuff at the end of the file and automatically
1393 * adjust the settings for this buffer only. This must remain at the end
1394 * of the file.
1395 * ---------------------------------------------------------------------------
1396 * Local variables:
1397 * c-file-style: "linux"
1398 * End:
1399 */