blob: c3e205b6a4c9d5b05522cfbe1bc04f894ccf3fc9 [file] [log] [blame]
Jeff Dike6c29256c2006-03-27 01:14:37 -08001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
23#include "linux/config.h"
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include "linux/cdrom.h"
29#include "linux/proc_fs.h"
30#include "linux/ctype.h"
31#include "linux/capability.h"
32#include "linux/mm.h"
33#include "linux/vmalloc.h"
34#include "linux/blkpg.h"
35#include "linux/genhd.h"
36#include "linux/spinlock.h"
Russell Kingd052d1b2005-10-29 19:07:23 +010037#include "linux/platform_device.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include "asm/segment.h"
39#include "asm/uaccess.h"
40#include "asm/irq.h"
41#include "asm/types.h"
42#include "asm/tlbflush.h"
43#include "user_util.h"
44#include "mem_user.h"
45#include "kern_util.h"
46#include "kern.h"
47#include "mconsole_kern.h"
48#include "init.h"
49#include "irq_user.h"
50#include "irq_kern.h"
51#include "ubd_user.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include "os.h"
53#include "mem.h"
54#include "mem_kern.h"
55#include "cow.h"
56
Jeff Dike7b9014c2005-05-20 13:59:11 -070057enum ubd_req { UBD_READ, UBD_WRITE };
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
59struct io_thread_req {
Jeff Dike91acb212005-10-10 23:10:32 -040060 enum ubd_req op;
Linus Torvalds1da177e2005-04-16 15:20:36 -070061 int fds[2];
62 unsigned long offsets[2];
63 unsigned long long offset;
64 unsigned long length;
65 char *buffer;
66 int sectorsize;
Jeff Dike91acb212005-10-10 23:10:32 -040067 unsigned long sector_mask;
68 unsigned long long cow_offset;
69 unsigned long bitmap_words[2];
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 int error;
71};
72
Jeff Dike6c29256c2006-03-27 01:14:37 -080073extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
Linus Torvalds1da177e2005-04-16 15:20:36 -070074 char **backing_file_out, int *bitmap_offset_out,
75 unsigned long *bitmap_len_out, int *data_offset_out,
76 int *create_cow_out);
77extern int create_cow_file(char *cow_file, char *backing_file,
78 struct openflags flags, int sectorsize,
79 int alignment, int *bitmap_offset_out,
80 unsigned long *bitmap_len_out,
81 int *data_offset_out);
82extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
Jeff Dike91acb212005-10-10 23:10:32 -040083extern void do_io(struct io_thread_req *req);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
Jeff Dike91acb212005-10-10 23:10:32 -040085static inline int ubd_test_bit(__u64 bit, unsigned char *data)
Linus Torvalds1da177e2005-04-16 15:20:36 -070086{
87 __u64 n;
88 int bits, off;
89
Jeff Dike91acb212005-10-10 23:10:32 -040090 bits = sizeof(data[0]) * 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 n = bit / bits;
92 off = bit % bits;
Jeff Dike91acb212005-10-10 23:10:32 -040093 return((data[n] & (1 << off)) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -070094}
95
Jeff Dike91acb212005-10-10 23:10:32 -040096static inline void ubd_set_bit(__u64 bit, unsigned char *data)
Linus Torvalds1da177e2005-04-16 15:20:36 -070097{
98 __u64 n;
99 int bits, off;
100
Jeff Dike91acb212005-10-10 23:10:32 -0400101 bits = sizeof(data[0]) * 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102 n = bit / bits;
103 off = bit % bits;
Jeff Dike91acb212005-10-10 23:10:32 -0400104 data[n] |= (1 << off);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105}
106/*End stuff from ubd_user.h*/
107
108#define DRIVER_NAME "uml-blkdev"
109
110static DEFINE_SPINLOCK(ubd_io_lock);
111static DEFINE_SPINLOCK(ubd_lock);
112
Jeff Dike91acb212005-10-10 23:10:32 -0400113static void (*do_ubd)(void);
114
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115static int ubd_open(struct inode * inode, struct file * filp);
116static int ubd_release(struct inode * inode, struct file * file);
117static int ubd_ioctl(struct inode * inode, struct file * file,
118 unsigned int cmd, unsigned long arg);
Christoph Hellwiga885c8c2006-01-08 01:02:50 -0800119static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120
121#define MAX_DEV (8)
122
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123static struct block_device_operations ubd_blops = {
124 .owner = THIS_MODULE,
125 .open = ubd_open,
126 .release = ubd_release,
127 .ioctl = ubd_ioctl,
Christoph Hellwiga885c8c2006-01-08 01:02:50 -0800128 .getgeo = ubd_getgeo,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129};
130
131/* Protected by the queue_lock */
132static request_queue_t *ubd_queue;
133
134/* Protected by ubd_lock */
135static int fake_major = MAJOR_NR;
136
137static struct gendisk *ubd_gendisk[MAX_DEV];
138static struct gendisk *fake_gendisk[MAX_DEV];
Jeff Dike6c29256c2006-03-27 01:14:37 -0800139
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140#ifdef CONFIG_BLK_DEV_UBD_SYNC
141#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
142 .cl = 1 })
143#else
144#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
145 .cl = 1 })
146#endif
147
148/* Not protected - changed only in ubd_setup_common and then only to
149 * to enable O_SYNC.
150 */
151static struct openflags global_openflags = OPEN_FLAGS;
152
153struct cow {
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700154 /* This is the backing file, actually */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 char *file;
156 int fd;
157 unsigned long *bitmap;
158 unsigned long bitmap_len;
159 int bitmap_offset;
160 int data_offset;
161};
162
163struct ubd {
164 char *file;
165 int count;
166 int fd;
167 __u64 size;
168 struct openflags boot_openflags;
169 struct openflags openflags;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800170 int shared;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 int no_cow;
172 struct cow cow;
173 struct platform_device pdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174};
175
176#define DEFAULT_COW { \
177 .file = NULL, \
178 .fd = -1, \
179 .bitmap = NULL, \
180 .bitmap_offset = 0, \
181 .data_offset = 0, \
182}
183
184#define DEFAULT_UBD { \
185 .file = NULL, \
186 .count = 0, \
187 .fd = -1, \
188 .size = -1, \
189 .boot_openflags = OPEN_FLAGS, \
190 .openflags = OPEN_FLAGS, \
191 .no_cow = 0, \
Jeff Dike6c29256c2006-03-27 01:14:37 -0800192 .shared = 0, \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 .cow = DEFAULT_COW, \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194}
195
196struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
197
198static int ubd0_init(void)
199{
200 struct ubd *dev = &ubd_dev[0];
201
202 if(dev->file == NULL)
203 dev->file = "root_fs";
204 return(0);
205}
206
207__initcall(ubd0_init);
208
209/* Only changed by fake_ide_setup which is a setup */
210static int fake_ide = 0;
211static struct proc_dir_entry *proc_ide_root = NULL;
212static struct proc_dir_entry *proc_ide = NULL;
213
214static void make_proc_ide(void)
215{
216 proc_ide_root = proc_mkdir("ide", NULL);
217 proc_ide = proc_mkdir("ide0", proc_ide_root);
218}
219
220static int proc_ide_read_media(char *page, char **start, off_t off, int count,
221 int *eof, void *data)
222{
223 int len;
224
225 strcpy(page, "disk\n");
226 len = strlen("disk\n");
227 len -= off;
228 if (len < count){
229 *eof = 1;
230 if (len <= 0) return 0;
231 }
232 else len = count;
233 *start = page + off;
234 return len;
235}
236
237static void make_ide_entries(char *dev_name)
238{
239 struct proc_dir_entry *dir, *ent;
240 char name[64];
241
242 if(proc_ide_root == NULL) make_proc_ide();
243
244 dir = proc_mkdir(dev_name, proc_ide);
245 if(!dir) return;
246
247 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
248 if(!ent) return;
249 ent->nlink = 1;
250 ent->data = NULL;
251 ent->read_proc = proc_ide_read_media;
252 ent->write_proc = NULL;
253 sprintf(name,"ide0/%s", dev_name);
254 proc_symlink(dev_name, proc_ide_root, name);
255}
256
257static int fake_ide_setup(char *str)
258{
259 fake_ide = 1;
260 return(1);
261}
262
263__setup("fake_ide", fake_ide_setup);
264
265__uml_help(fake_ide_setup,
266"fake_ide\n"
267" Create ide0 entries that map onto ubd devices.\n\n"
268);
269
270static int parse_unit(char **ptr)
271{
272 char *str = *ptr, *end;
273 int n = -1;
274
275 if(isdigit(*str)) {
276 n = simple_strtoul(str, &end, 0);
277 if(end == str)
278 return(-1);
279 *ptr = end;
280 }
281 else if (('a' <= *str) && (*str <= 'h')) {
282 n = *str - 'a';
283 str++;
284 *ptr = str;
285 }
286 return(n);
287}
288
289static int ubd_setup_common(char *str, int *index_out)
290{
291 struct ubd *dev;
292 struct openflags flags = global_openflags;
293 char *backing_file;
294 int n, err, i;
295
296 if(index_out) *index_out = -1;
297 n = *str;
298 if(n == '='){
299 char *end;
300 int major;
301
302 str++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 if(!strcmp(str, "sync")){
304 global_openflags = of_sync(global_openflags);
305 return(0);
306 }
307 major = simple_strtoul(str, &end, 0);
308 if((*end != '\0') || (end == str)){
Jeff Dike6c29256c2006-03-27 01:14:37 -0800309 printk(KERN_ERR
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 "ubd_setup : didn't parse major number\n");
311 return(1);
312 }
313
314 err = 1;
315 spin_lock(&ubd_lock);
316 if(fake_major != MAJOR_NR){
317 printk(KERN_ERR "Can't assign a fake major twice\n");
318 goto out1;
319 }
Jeff Dike6c29256c2006-03-27 01:14:37 -0800320
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 fake_major = major;
322
323 printk(KERN_INFO "Setting extra ubd major number to %d\n",
324 major);
325 err = 0;
326 out1:
327 spin_unlock(&ubd_lock);
328 return(err);
329 }
330
331 n = parse_unit(&str);
332 if(n < 0){
333 printk(KERN_ERR "ubd_setup : couldn't parse unit number "
334 "'%s'\n", str);
335 return(1);
336 }
337 if(n >= MAX_DEV){
338 printk(KERN_ERR "ubd_setup : index %d out of range "
339 "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
340 return(1);
341 }
342
343 err = 1;
344 spin_lock(&ubd_lock);
345
346 dev = &ubd_dev[n];
347 if(dev->file != NULL){
348 printk(KERN_ERR "ubd_setup : device already configured\n");
349 goto out;
350 }
351
352 if (index_out)
353 *index_out = n;
354
Jeff Dike6c29256c2006-03-27 01:14:37 -0800355 for (i = 0; i < sizeof("rscd="); i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 switch (*str) {
357 case 'r':
358 flags.w = 0;
359 break;
360 case 's':
361 flags.s = 1;
362 break;
363 case 'd':
364 dev->no_cow = 1;
365 break;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800366 case 'c':
367 dev->shared = 1;
368 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 case '=':
370 str++;
371 goto break_loop;
372 default:
Jeff Dike6c29256c2006-03-27 01:14:37 -0800373 printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r, s, c, or d)\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 goto out;
375 }
376 str++;
377 }
378
379 if (*str == '=')
380 printk(KERN_ERR "ubd_setup : Too many flags specified\n");
381 else
382 printk(KERN_ERR "ubd_setup : Expected '='\n");
383 goto out;
384
385break_loop:
386 err = 0;
387 backing_file = strchr(str, ',');
388
389 if (!backing_file) {
390 backing_file = strchr(str, ':');
391 }
392
393 if(backing_file){
394 if(dev->no_cow)
395 printk(KERN_ERR "Can't specify both 'd' and a "
396 "cow file\n");
397 else {
398 *backing_file = '\0';
399 backing_file++;
400 }
401 }
402 dev->file = str;
403 dev->cow.file = backing_file;
404 dev->boot_openflags = flags;
405out:
406 spin_unlock(&ubd_lock);
407 return(err);
408}
409
410static int ubd_setup(char *str)
411{
412 ubd_setup_common(str, NULL);
413 return(1);
414}
415
416__setup("ubd", ubd_setup);
417__uml_help(ubd_setup,
418"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
419" This is used to associate a device with a file in the underlying\n"
420" filesystem. When specifying two filenames, the first one is the\n"
421" COW name and the second is the backing file name. As separator you can\n"
422" use either a ':' or a ',': the first one allows writing things like;\n"
423" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
424" while with a ',' the shell would not expand the 2nd '~'.\n"
425" When using only one filename, UML will detect whether to thread it like\n"
426" a COW file or a backing file. To override this detection, add the 'd'\n"
427" flag:\n"
428" ubd0d=BackingFile\n"
429" Usually, there is a filesystem in the file, but \n"
430" that's not required. Swap devices containing swap files can be\n"
431" specified like this. Also, a file which doesn't contain a\n"
432" filesystem can have its contents read in the virtual \n"
433" machine by running 'dd' on the device. <n> must be in the range\n"
434" 0 to 7. Appending an 'r' to the number will cause that device\n"
435" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
436" an 's' will cause data to be written to disk on the host immediately.\n\n"
437);
438
439static int udb_setup(char *str)
440{
441 printk("udb%s specified on command line is almost certainly a ubd -> "
442 "udb TYPO\n", str);
443 return(1);
444}
445
446__setup("udb", udb_setup);
447__uml_help(udb_setup,
448"udb\n"
Jeff Dike0894e272005-05-28 15:51:55 -0700449" This option is here solely to catch ubd -> udb typos, which can be\n"
450" to impossible to catch visually unless you specifically look for\n"
451" them. The only result of any option starting with 'udb' is an error\n"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452" in the boot output.\n\n"
453);
454
455static int fakehd_set = 0;
456static int fakehd(char *str)
457{
458 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
459 fakehd_set = 1;
460 return 1;
461}
462
463__setup("fakehd", fakehd);
464__uml_help(fakehd,
465"fakehd\n"
466" Change the ubd device name to \"hd\".\n\n"
467);
468
469static void do_ubd_request(request_queue_t * q);
Jeff Dike91acb212005-10-10 23:10:32 -0400470
471/* Only changed by ubd_init, which is an initcall. */
472int thread_fd = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473
474/* Changed by ubd_handler, which is serialized because interrupts only
475 * happen on CPU 0.
476 */
477int intr_count = 0;
478
479/* call ubd_finish if you need to serialize */
Jeff Dike91acb212005-10-10 23:10:32 -0400480static void __ubd_finish(struct request *req, int error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481{
Jeff Dike91acb212005-10-10 23:10:32 -0400482 int nsect;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483
Jeff Dike91acb212005-10-10 23:10:32 -0400484 if(error){
485 end_request(req, 0);
486 return;
487 }
488 nsect = req->current_nr_sectors;
489 req->sector += nsect;
490 req->buffer += nsect << 9;
491 req->errors = 0;
492 req->nr_sectors -= nsect;
493 req->current_nr_sectors = 0;
494 end_request(req, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495}
496
Jeff Dike91acb212005-10-10 23:10:32 -0400497static inline void ubd_finish(struct request *req, int error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498{
Jeff Dike91acb212005-10-10 23:10:32 -0400499 spin_lock(&ubd_io_lock);
500 __ubd_finish(req, error);
501 spin_unlock(&ubd_io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502}
503
Jeff Dike91acb212005-10-10 23:10:32 -0400504/* Called without ubd_io_lock held */
505static void ubd_handler(void)
506{
507 struct io_thread_req req;
508 struct request *rq = elv_next_request(ubd_queue);
509 int n;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510
Jeff Dike91acb212005-10-10 23:10:32 -0400511 do_ubd = NULL;
512 intr_count++;
513 n = os_read_file(thread_fd, &req, sizeof(req));
514 if(n != sizeof(req)){
515 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
516 "err = %d\n", os_getpid(), -n);
517 spin_lock(&ubd_io_lock);
518 end_request(rq, 0);
519 spin_unlock(&ubd_io_lock);
520 return;
521 }
Jeff Dike6c29256c2006-03-27 01:14:37 -0800522
Jeff Dike91acb212005-10-10 23:10:32 -0400523 ubd_finish(rq, req.error);
524 reactivate_fd(thread_fd, UBD_IRQ);
525 do_ubd_request(ubd_queue);
526}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
528static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
529{
Jeff Dike91acb212005-10-10 23:10:32 -0400530 ubd_handler();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 return(IRQ_HANDLED);
532}
533
Jeff Dike91acb212005-10-10 23:10:32 -0400534/* Only changed by ubd_init, which is an initcall. */
535static int io_pid = -1;
536
537void kill_io_thread(void)
538{
Jeff Dike6c29256c2006-03-27 01:14:37 -0800539 if(io_pid != -1)
Jeff Dike91acb212005-10-10 23:10:32 -0400540 os_kill_process(io_pid, 1);
541}
542
543__uml_exitcall(kill_io_thread);
544
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545static int ubd_file_size(struct ubd *dev, __u64 *size_out)
546{
547 char *file;
548
549 file = dev->cow.file ? dev->cow.file : dev->file;
550 return(os_file_size(file, size_out));
551}
552
553static void ubd_close(struct ubd *dev)
554{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 os_close_file(dev->fd);
556 if(dev->cow.file == NULL)
557 return;
558
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 os_close_file(dev->cow.fd);
560 vfree(dev->cow.bitmap);
561 dev->cow.bitmap = NULL;
562}
563
564static int ubd_open_dev(struct ubd *dev)
565{
566 struct openflags flags;
567 char **back_ptr;
568 int err, create_cow, *create_ptr;
569
570 dev->openflags = dev->boot_openflags;
571 create_cow = 0;
572 create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
573 back_ptr = dev->no_cow ? NULL : &dev->cow.file;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800574 dev->fd = open_ubd_file(dev->file, &dev->openflags, dev->shared,
575 back_ptr, &dev->cow.bitmap_offset,
576 &dev->cow.bitmap_len, &dev->cow.data_offset,
577 create_ptr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578
579 if((dev->fd == -ENOENT) && create_cow){
Jeff Dike6c29256c2006-03-27 01:14:37 -0800580 dev->fd = create_cow_file(dev->file, dev->cow.file,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 dev->openflags, 1 << 9, PAGE_SIZE,
Jeff Dike6c29256c2006-03-27 01:14:37 -0800582 &dev->cow.bitmap_offset,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 &dev->cow.bitmap_len,
584 &dev->cow.data_offset);
585 if(dev->fd >= 0){
586 printk(KERN_INFO "Creating \"%s\" as COW file for "
587 "\"%s\"\n", dev->file, dev->cow.file);
588 }
589 }
590
591 if(dev->fd < 0){
592 printk("Failed to open '%s', errno = %d\n", dev->file,
593 -dev->fd);
594 return(dev->fd);
595 }
596
597 if(dev->cow.file != NULL){
598 err = -ENOMEM;
599 dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
600 if(dev->cow.bitmap == NULL){
601 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
602 goto error;
603 }
604 flush_tlb_kernel_vm();
605
Jeff Dike6c29256c2006-03-27 01:14:37 -0800606 err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
607 dev->cow.bitmap_offset,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 dev->cow.bitmap_len);
609 if(err < 0)
610 goto error;
611
612 flags = dev->openflags;
613 flags.w = 0;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800614 err = open_ubd_file(dev->cow.file, &flags, dev->shared, NULL,
615 NULL, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 if(err < 0) goto error;
617 dev->cow.fd = err;
618 }
619 return(0);
620 error:
621 os_close_file(dev->fd);
622 return(err);
623}
624
625static int ubd_new_disk(int major, u64 size, int unit,
626 struct gendisk **disk_out)
627
628{
629 struct gendisk *disk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 int err;
631
632 disk = alloc_disk(1 << UBD_SHIFT);
633 if(disk == NULL)
634 return(-ENOMEM);
635
636 disk->major = major;
637 disk->first_minor = unit << UBD_SHIFT;
638 disk->fops = &ubd_blops;
639 set_capacity(disk, size / 512);
Greg Kroah-Hartmance7b0f42005-06-20 21:15:16 -0700640 if(major == MAJOR_NR)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
Greg Kroah-Hartmance7b0f42005-06-20 21:15:16 -0700642 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643 sprintf(disk->disk_name, "ubd_fake%d", unit);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644
645 /* sysfs register (not for ide fake devices) */
646 if (major == MAJOR_NR) {
647 ubd_dev[unit].pdev.id = unit;
648 ubd_dev[unit].pdev.name = DRIVER_NAME;
649 platform_device_register(&ubd_dev[unit].pdev);
650 disk->driverfs_dev = &ubd_dev[unit].pdev.dev;
651 }
652
653 disk->private_data = &ubd_dev[unit];
654 disk->queue = ubd_queue;
655 add_disk(disk);
656
657 *disk_out = disk;
658 return 0;
659}
660
661#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
662
663static int ubd_add(int n)
664{
665 struct ubd *dev = &ubd_dev[n];
666 int err;
667
Jeff Dikeec7cf782005-09-03 15:57:29 -0700668 err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669 if(dev->file == NULL)
Jeff Dikeec7cf782005-09-03 15:57:29 -0700670 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671
672 if (ubd_open_dev(dev))
Jeff Dikeec7cf782005-09-03 15:57:29 -0700673 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674
675 err = ubd_file_size(dev, &dev->size);
676 if(err < 0)
Jeff Dikeec7cf782005-09-03 15:57:29 -0700677 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
679 dev->size = ROUND_BLOCK(dev->size);
680
681 err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
Jeff Dike6c29256c2006-03-27 01:14:37 -0800682 if(err)
Jeff Dikeec7cf782005-09-03 15:57:29 -0700683 goto out_close;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800684
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 if(fake_major != MAJOR_NR)
Jeff Dike6c29256c2006-03-27 01:14:37 -0800686 ubd_new_disk(fake_major, dev->size, n,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 &fake_gendisk[n]);
688
689 /* perhaps this should also be under the "if (fake_major)" above */
690 /* using the fake_disk->disk_name and also the fakehd_set name */
691 if (fake_ide)
692 make_ide_entries(ubd_gendisk[n]->disk_name);
693
Jeff Dikeec7cf782005-09-03 15:57:29 -0700694 err = 0;
695out_close:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 ubd_close(dev);
Jeff Dikeec7cf782005-09-03 15:57:29 -0700697out:
698 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699}
700
701static int ubd_config(char *str)
702{
703 int n, err;
704
Jeff Dike970d6e32006-01-06 00:18:48 -0800705 str = kstrdup(str, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 if(str == NULL){
707 printk(KERN_ERR "ubd_config failed to strdup string\n");
708 return(1);
709 }
710 err = ubd_setup_common(str, &n);
711 if(err){
712 kfree(str);
713 return(-1);
714 }
715 if(n == -1) return(0);
716
717 spin_lock(&ubd_lock);
718 err = ubd_add(n);
719 if(err)
720 ubd_dev[n].file = NULL;
721 spin_unlock(&ubd_lock);
722
723 return(err);
724}
725
726static int ubd_get_config(char *name, char *str, int size, char **error_out)
727{
728 struct ubd *dev;
729 int n, len = 0;
730
731 n = parse_unit(&name);
732 if((n >= MAX_DEV) || (n < 0)){
733 *error_out = "ubd_get_config : device number out of range";
734 return(-1);
735 }
736
737 dev = &ubd_dev[n];
738 spin_lock(&ubd_lock);
739
740 if(dev->file == NULL){
741 CONFIG_CHUNK(str, size, len, "", 1);
742 goto out;
743 }
744
745 CONFIG_CHUNK(str, size, len, dev->file, 0);
746
747 if(dev->cow.file != NULL){
748 CONFIG_CHUNK(str, size, len, ",", 0);
749 CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
750 }
751 else CONFIG_CHUNK(str, size, len, "", 1);
752
753 out:
754 spin_unlock(&ubd_lock);
755 return(len);
756}
757
Jeff Dike29d56cf2005-06-25 14:55:25 -0700758static int ubd_id(char **str, int *start_out, int *end_out)
759{
760 int n;
761
762 n = parse_unit(str);
763 *start_out = 0;
764 *end_out = MAX_DEV - 1;
765 return n;
766}
767
768static int ubd_remove(int n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769{
770 struct ubd *dev;
Jeff Dike29d56cf2005-06-25 14:55:25 -0700771 int err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772
Jeff Dike29d56cf2005-06-25 14:55:25 -0700773 spin_lock(&ubd_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774
775 if(ubd_gendisk[n] == NULL)
776 goto out;
777
Jeff Dike29d56cf2005-06-25 14:55:25 -0700778 dev = &ubd_dev[n];
779
780 if(dev->file == NULL)
781 goto out;
782
783 /* you cannot remove a open disk */
784 err = -EBUSY;
785 if(dev->count > 0)
786 goto out;
787
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 del_gendisk(ubd_gendisk[n]);
789 put_disk(ubd_gendisk[n]);
790 ubd_gendisk[n] = NULL;
791
792 if(fake_gendisk[n] != NULL){
793 del_gendisk(fake_gendisk[n]);
794 put_disk(fake_gendisk[n]);
795 fake_gendisk[n] = NULL;
796 }
797
798 platform_device_unregister(&dev->pdev);
799 *dev = ((struct ubd) DEFAULT_UBD);
800 err = 0;
Jeff Dike29d56cf2005-06-25 14:55:25 -0700801out:
802 spin_unlock(&ubd_lock);
803 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804}
805
806static struct mc_device ubd_mc = {
807 .name = "ubd",
808 .config = ubd_config,
809 .get_config = ubd_get_config,
Jeff Dike29d56cf2005-06-25 14:55:25 -0700810 .id = ubd_id,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 .remove = ubd_remove,
812};
813
814static int ubd_mc_init(void)
815{
816 mconsole_register_dev(&ubd_mc);
817 return 0;
818}
819
820__initcall(ubd_mc_init);
821
Russell King3ae5eae2005-11-09 22:32:44 +0000822static struct platform_driver ubd_driver = {
823 .driver = {
824 .name = DRIVER_NAME,
825 },
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826};
827
828int ubd_init(void)
829{
830 int i;
831
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 if (register_blkdev(MAJOR_NR, "ubd"))
833 return -1;
834
835 ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
836 if (!ubd_queue) {
837 unregister_blkdev(MAJOR_NR, "ubd");
838 return -1;
839 }
840
841 if (fake_major != MAJOR_NR) {
842 char name[sizeof("ubd_nnn\0")];
843
844 snprintf(name, sizeof(name), "ubd_%d", fake_major);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 if (register_blkdev(fake_major, "ubd"))
846 return -1;
847 }
Russell King3ae5eae2005-11-09 22:32:44 +0000848 platform_driver_register(&ubd_driver);
Jeff Dike6c29256c2006-03-27 01:14:37 -0800849 for (i = 0; i < MAX_DEV; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 ubd_add(i);
851 return 0;
852}
853
854late_initcall(ubd_init);
855
Jeff Dike91acb212005-10-10 23:10:32 -0400856int ubd_driver_init(void){
857 unsigned long stack;
858 int err;
859
860 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
861 if(global_openflags.s){
862 printk(KERN_INFO "ubd: Synchronous mode\n");
863 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
864 * enough. So use anyway the io thread. */
865 }
866 stack = alloc_stack(0, 0);
Jeff Dike6c29256c2006-03-27 01:14:37 -0800867 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
Jeff Dike91acb212005-10-10 23:10:32 -0400868 &thread_fd);
869 if(io_pid < 0){
Jeff Dike6c29256c2006-03-27 01:14:37 -0800870 printk(KERN_ERR
Jeff Dike91acb212005-10-10 23:10:32 -0400871 "ubd : Failed to start I/O thread (errno = %d) - "
872 "falling back to synchronous I/O\n", -io_pid);
873 io_pid = -1;
874 return(0);
875 }
Jeff Dike6c29256c2006-03-27 01:14:37 -0800876 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
Jeff Dike91acb212005-10-10 23:10:32 -0400877 SA_INTERRUPT, "ubd", ubd_dev);
878 if(err != 0)
879 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
Jeff Dikef4c57a72006-03-31 02:30:10 -0800880 return 0;
Jeff Dike91acb212005-10-10 23:10:32 -0400881}
882
883device_initcall(ubd_driver_init);
884
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885static int ubd_open(struct inode *inode, struct file *filp)
886{
887 struct gendisk *disk = inode->i_bdev->bd_disk;
888 struct ubd *dev = disk->private_data;
889 int err = 0;
890
891 if(dev->count == 0){
892 err = ubd_open_dev(dev);
893 if(err){
894 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
895 disk->disk_name, dev->file, -err);
896 goto out;
897 }
898 }
899 dev->count++;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700900 set_disk_ro(disk, !dev->openflags.w);
901
902 /* This should no more be needed. And it didn't work anyway to exclude
903 * read-write remounting of filesystems.*/
904 /*if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 if(--dev->count == 0) ubd_close(dev);
906 err = -EROFS;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700907 }*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 out:
909 return(err);
910}
911
912static int ubd_release(struct inode * inode, struct file * file)
913{
914 struct gendisk *disk = inode->i_bdev->bd_disk;
915 struct ubd *dev = disk->private_data;
916
917 if(--dev->count == 0)
918 ubd_close(dev);
919 return(0);
920}
921
Jeff Dike91acb212005-10-10 23:10:32 -0400922static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
923 __u64 *cow_offset, unsigned long *bitmap,
924 __u64 bitmap_offset, unsigned long *bitmap_words,
925 __u64 bitmap_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926{
Jeff Dike91acb212005-10-10 23:10:32 -0400927 __u64 sector = io_offset >> 9;
928 int i, update_bitmap = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929
Jeff Dike91acb212005-10-10 23:10:32 -0400930 for(i = 0; i < length >> 9; i++){
931 if(cow_mask != NULL)
932 ubd_set_bit(i, (unsigned char *) cow_mask);
933 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
934 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935
Jeff Dike91acb212005-10-10 23:10:32 -0400936 update_bitmap = 1;
937 ubd_set_bit(sector + i, (unsigned char *) bitmap);
938 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939
Jeff Dike91acb212005-10-10 23:10:32 -0400940 if(!update_bitmap)
941 return;
942
943 *cow_offset = sector / (sizeof(unsigned long) * 8);
944
945 /* This takes care of the case where we're exactly at the end of the
946 * device, and *cow_offset + 1 is off the end. So, just back it up
947 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
948 * for the original diagnosis.
949 */
950 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
951 sizeof(unsigned long) - 1))
952 (*cow_offset)--;
953
954 bitmap_words[0] = bitmap[*cow_offset];
955 bitmap_words[1] = bitmap[*cow_offset + 1];
956
957 *cow_offset *= sizeof(unsigned long);
958 *cow_offset += bitmap_offset;
959}
960
961static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
962 __u64 bitmap_offset, __u64 bitmap_len)
963{
964 __u64 sector = req->offset >> 9;
965 int i;
966
967 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
968 panic("Operation too long");
969
970 if(req->op == UBD_READ) {
971 for(i = 0; i < req->length >> 9; i++){
972 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
Jeff Dike6c29256c2006-03-27 01:14:37 -0800973 ubd_set_bit(i, (unsigned char *)
Jeff Dike91acb212005-10-10 23:10:32 -0400974 &req->sector_mask);
975 }
976 }
977 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
978 &req->cow_offset, bitmap, bitmap_offset,
979 req->bitmap_words, bitmap_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980}
981
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982/* Called with ubd_io_lock held */
Jeff Dike91acb212005-10-10 23:10:32 -0400983static int prepare_request(struct request *req, struct io_thread_req *io_req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984{
985 struct gendisk *disk = req->rq_disk;
986 struct ubd *dev = disk->private_data;
Jeff Dike91acb212005-10-10 23:10:32 -0400987 __u64 offset;
988 int len;
989
990 if(req->rq_status == RQ_INACTIVE) return(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700992 /* This should be impossible now */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
Jeff Dike6c29256c2006-03-27 01:14:37 -0800994 printk("Write attempted on readonly ubd device %s\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 disk->disk_name);
Jeff Dike91acb212005-10-10 23:10:32 -0400996 end_request(req, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 return(1);
998 }
999
Jeff Dike91acb212005-10-10 23:10:32 -04001000 offset = ((__u64) req->sector) << 9;
1001 len = req->current_nr_sectors << 9;
1002
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
1004 io_req->fds[1] = dev->fd;
Jeff Dike91acb212005-10-10 23:10:32 -04001005 io_req->cow_offset = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 io_req->offset = offset;
1007 io_req->length = len;
1008 io_req->error = 0;
Jeff Dike91acb212005-10-10 23:10:32 -04001009 io_req->sector_mask = 0;
1010
1011 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 io_req->offsets[0] = 0;
1013 io_req->offsets[1] = dev->cow.data_offset;
Jeff Dike91acb212005-10-10 23:10:32 -04001014 io_req->buffer = req->buffer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 io_req->sectorsize = 1 << 9;
1016
Jeff Dike91acb212005-10-10 23:10:32 -04001017 if(dev->cow.file != NULL)
1018 cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
1019 dev->cow.bitmap_len);
1020
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 return(0);
1022}
1023
1024/* Called with ubd_io_lock held */
1025static void do_ubd_request(request_queue_t *q)
1026{
1027 struct io_thread_req io_req;
1028 struct request *req;
Jeff Dike91acb212005-10-10 23:10:32 -04001029 int err, n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030
Jeff Dike91acb212005-10-10 23:10:32 -04001031 if(thread_fd == -1){
1032 while((req = elv_next_request(q)) != NULL){
1033 err = prepare_request(req, &io_req);
1034 if(!err){
1035 do_io(&io_req);
1036 __ubd_finish(req, io_req.error);
1037 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 }
1039 }
Jeff Dike91acb212005-10-10 23:10:32 -04001040 else {
1041 if(do_ubd || (req = elv_next_request(q)) == NULL)
1042 return;
1043 err = prepare_request(req, &io_req);
1044 if(!err){
1045 do_ubd = ubd_handler;
1046 n = os_write_file(thread_fd, (char *) &io_req,
1047 sizeof(io_req));
1048 if(n != sizeof(io_req))
1049 printk("write to io thread failed, "
1050 "errno = %d\n", -n);
1051 }
1052 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053}
1054
Christoph Hellwiga885c8c2006-01-08 01:02:50 -08001055static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1056{
1057 struct ubd *dev = bdev->bd_disk->private_data;
1058
1059 geo->heads = 128;
1060 geo->sectors = 32;
1061 geo->cylinders = dev->size / (128 * 32 * 512);
1062 return 0;
1063}
1064
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065static int ubd_ioctl(struct inode * inode, struct file * file,
1066 unsigned int cmd, unsigned long arg)
1067{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 struct ubd *dev = inode->i_bdev->bd_disk->private_data;
1069 struct hd_driveid ubd_id = {
1070 .cyls = 0,
1071 .heads = 128,
1072 .sectors = 32,
1073 };
1074
1075 switch (cmd) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 struct cdrom_volctrl volume;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077 case HDIO_GET_IDENTITY:
1078 ubd_id.cyls = dev->size / (128 * 32 * 512);
1079 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1080 sizeof(ubd_id)))
1081 return(-EFAULT);
1082 return(0);
1083
1084 case CDROMVOLREAD:
1085 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1086 return(-EFAULT);
1087 volume.channel0 = 255;
1088 volume.channel1 = 255;
1089 volume.channel2 = 255;
1090 volume.channel3 = 255;
1091 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1092 return(-EFAULT);
1093 return(0);
1094 }
1095 return(-EINVAL);
1096}
1097
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001098static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099{
1100 struct uml_stat buf1, buf2;
1101 int err;
1102
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001103 if(from_cmdline == NULL)
1104 return 0;
1105 if(!strcmp(from_cmdline, from_cow))
1106 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107
1108 err = os_stat_file(from_cmdline, &buf1);
1109 if(err < 0){
1110 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001111 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 }
1113 err = os_stat_file(from_cow, &buf2);
1114 if(err < 0){
1115 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001116 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 }
1118 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001119 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120
1121 printk("Backing file mismatch - \"%s\" requested,\n"
1122 "\"%s\" specified in COW header of \"%s\"\n",
1123 from_cmdline, from_cow, cow);
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001124 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125}
1126
1127static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1128{
1129 unsigned long modtime;
Paolo 'Blaisorblade' Giarrussofe1db502006-02-24 13:03:58 -08001130 unsigned long long actual;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 int err;
1132
1133 err = os_file_modtime(file, &modtime);
1134 if(err < 0){
1135 printk("Failed to get modification time of backing file "
1136 "\"%s\", err = %d\n", file, -err);
1137 return(err);
1138 }
1139
1140 err = os_file_size(file, &actual);
1141 if(err < 0){
1142 printk("Failed to get size of backing file \"%s\", "
1143 "err = %d\n", file, -err);
1144 return(err);
1145 }
1146
1147 if(actual != size){
1148 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1149 * the typecast.*/
1150 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1151 "file\n", (unsigned long long) size, actual);
1152 return(-EINVAL);
1153 }
1154 if(modtime != mtime){
1155 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1156 "file\n", mtime, modtime);
1157 return(-EINVAL);
1158 }
1159 return(0);
1160}
1161
1162int read_cow_bitmap(int fd, void *buf, int offset, int len)
1163{
1164 int err;
1165
1166 err = os_seek_file(fd, offset);
1167 if(err < 0)
1168 return(err);
1169
1170 err = os_read_file(fd, buf, len);
1171 if(err < 0)
1172 return(err);
1173
1174 return(0);
1175}
1176
Jeff Dike6c29256c2006-03-27 01:14:37 -08001177int open_ubd_file(char *file, struct openflags *openflags, int shared,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178 char **backing_file_out, int *bitmap_offset_out,
1179 unsigned long *bitmap_len_out, int *data_offset_out,
1180 int *create_cow_out)
1181{
1182 time_t mtime;
1183 unsigned long long size;
1184 __u32 version, align;
1185 char *backing_file;
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001186 int fd, err, sectorsize, asked_switch, mode = 0644;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187
1188 fd = os_open_file(file, *openflags, mode);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001189 if (fd < 0) {
1190 if ((fd == -ENOENT) && (create_cow_out != NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 *create_cow_out = 1;
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001192 if (!openflags->w ||
1193 ((fd != -EROFS) && (fd != -EACCES)))
1194 return fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 openflags->w = 0;
1196 fd = os_open_file(file, *openflags, mode);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001197 if (fd < 0)
1198 return fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 }
1200
Jeff Dike6c29256c2006-03-27 01:14:37 -08001201 if(shared)
1202 printk("Not locking \"%s\" on the host\n", file);
1203 else {
1204 err = os_lock_file(fd, openflags->w);
1205 if(err < 0){
1206 printk("Failed to lock '%s', err = %d\n", file, -err);
1207 goto out_close;
1208 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 }
1210
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001211 /* Succesful return case! */
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001212 if(backing_file_out == NULL)
1213 return(fd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214
1215 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1216 &size, &sectorsize, &align, bitmap_offset_out);
1217 if(err && (*backing_file_out != NULL)){
1218 printk("Failed to read COW header from COW file \"%s\", "
1219 "errno = %d\n", file, -err);
1220 goto out_close;
1221 }
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001222 if(err)
1223 return(fd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001225 asked_switch = path_requires_switch(*backing_file_out, backing_file, file);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001227 /* Allow switching only if no mismatch. */
1228 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 printk("Switching backing file to '%s'\n", *backing_file_out);
1230 err = write_cow_header(file, fd, *backing_file_out,
1231 sectorsize, align, &size);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001232 if (err) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 printk("Switch failed, errno = %d\n", -err);
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001234 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 }
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001236 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 *backing_file_out = backing_file;
1238 err = backing_file_mismatch(*backing_file_out, size, mtime);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001239 if (err)
1240 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 }
1242
1243 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1244 bitmap_len_out, data_offset_out);
1245
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001246 return fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 out_close:
1248 os_close_file(fd);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001249 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250}
1251
1252int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1253 int sectorsize, int alignment, int *bitmap_offset_out,
1254 unsigned long *bitmap_len_out, int *data_offset_out)
1255{
1256 int err, fd;
1257
1258 flags.c = 1;
Jeff Dike6c29256c2006-03-27 01:14:37 -08001259 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 if(fd < 0){
1261 err = fd;
1262 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1263 -err);
1264 goto out;
1265 }
1266
1267 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1268 bitmap_offset_out, bitmap_len_out,
1269 data_offset_out);
1270 if(!err)
1271 return(fd);
1272 os_close_file(fd);
1273 out:
1274 return(err);
1275}
1276
Jeff Dike91acb212005-10-10 23:10:32 -04001277static int update_bitmap(struct io_thread_req *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278{
Jeff Dike91acb212005-10-10 23:10:32 -04001279 int n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280
Jeff Dike91acb212005-10-10 23:10:32 -04001281 if(req->cow_offset == -1)
1282 return(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283
Jeff Dike91acb212005-10-10 23:10:32 -04001284 n = os_seek_file(req->fds[1], req->cow_offset);
1285 if(n < 0){
1286 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1287 return(1);
1288 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289
Jeff Dike91acb212005-10-10 23:10:32 -04001290 n = os_write_file(req->fds[1], &req->bitmap_words,
1291 sizeof(req->bitmap_words));
1292 if(n != sizeof(req->bitmap_words)){
1293 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1294 req->fds[1]);
1295 return(1);
1296 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297
Jeff Dike91acb212005-10-10 23:10:32 -04001298 return(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299}
Jeff Dike91acb212005-10-10 23:10:32 -04001300
1301void do_io(struct io_thread_req *req)
1302{
1303 char *buf;
1304 unsigned long len;
1305 int n, nsectors, start, end, bit;
1306 int err;
1307 __u64 off;
1308
1309 nsectors = req->length / req->sectorsize;
1310 start = 0;
1311 do {
1312 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1313 end = start;
1314 while((end < nsectors) &&
1315 (ubd_test_bit(end, (unsigned char *)
1316 &req->sector_mask) == bit))
1317 end++;
1318
1319 off = req->offset + req->offsets[bit] +
1320 start * req->sectorsize;
1321 len = (end - start) * req->sectorsize;
1322 buf = &req->buffer[start * req->sectorsize];
1323
1324 err = os_seek_file(req->fds[bit], off);
1325 if(err < 0){
1326 printk("do_io - lseek failed : err = %d\n", -err);
1327 req->error = 1;
1328 return;
1329 }
1330 if(req->op == UBD_READ){
1331 n = 0;
1332 do {
1333 buf = &buf[n];
1334 len -= n;
1335 n = os_read_file(req->fds[bit], buf, len);
1336 if (n < 0) {
1337 printk("do_io - read failed, err = %d "
1338 "fd = %d\n", -n, req->fds[bit]);
1339 req->error = 1;
1340 return;
1341 }
1342 } while((n < len) && (n != 0));
1343 if (n < len) memset(&buf[n], 0, len - n);
1344 } else {
1345 n = os_write_file(req->fds[bit], buf, len);
1346 if(n != len){
1347 printk("do_io - write failed err = %d "
1348 "fd = %d\n", -n, req->fds[bit]);
1349 req->error = 1;
1350 return;
1351 }
1352 }
1353
1354 start = end;
1355 } while(start < nsectors);
1356
1357 req->error = update_bitmap(req);
1358}
1359
1360/* Changed in start_io_thread, which is serialized by being called only
1361 * from ubd_init, which is an initcall.
1362 */
1363int kernel_fd = -1;
1364
1365/* Only changed by the io thread */
1366int io_count = 0;
1367
1368int io_thread(void *arg)
1369{
1370 struct io_thread_req req;
1371 int n;
1372
1373 ignore_sigwinch_sig();
1374 while(1){
1375 n = os_read_file(kernel_fd, &req, sizeof(req));
1376 if(n != sizeof(req)){
1377 if(n < 0)
1378 printk("io_thread - read failed, fd = %d, "
1379 "err = %d\n", kernel_fd, -n);
1380 else {
1381 printk("io_thread - short read, fd = %d, "
1382 "length = %d\n", kernel_fd, n);
1383 }
1384 continue;
1385 }
1386 io_count++;
1387 do_io(&req);
1388 n = os_write_file(kernel_fd, &req, sizeof(req));
1389 if(n != sizeof(req))
1390 printk("io_thread - write failed, fd = %d, err = %d\n",
1391 kernel_fd, -n);
1392 }
Jeff Dike91acb212005-10-10 23:10:32 -04001393
Jeff Dike1b57e9c2006-01-06 00:18:49 -08001394 return 0;
1395}