blob: e104f59ec519baa31efaba6de12524135c4e29b7 [file] [log] [blame]
Jeff Dike6c29256c2006-03-27 01:14:37 -08001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include "linux/module.h"
24#include "linux/blkdev.h"
25#include "linux/hdreg.h"
26#include "linux/init.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include "linux/cdrom.h"
28#include "linux/proc_fs.h"
29#include "linux/ctype.h"
30#include "linux/capability.h"
31#include "linux/mm.h"
32#include "linux/vmalloc.h"
33#include "linux/blkpg.h"
34#include "linux/genhd.h"
35#include "linux/spinlock.h"
Russell Kingd052d1b2005-10-29 19:07:23 +010036#include "linux/platform_device.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include "asm/segment.h"
38#include "asm/uaccess.h"
39#include "asm/irq.h"
40#include "asm/types.h"
41#include "asm/tlbflush.h"
42#include "user_util.h"
43#include "mem_user.h"
44#include "kern_util.h"
45#include "kern.h"
46#include "mconsole_kern.h"
47#include "init.h"
48#include "irq_user.h"
49#include "irq_kern.h"
50#include "ubd_user.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include "os.h"
52#include "mem.h"
53#include "mem_kern.h"
54#include "cow.h"
55
Jeff Dike7b9014c2005-05-20 13:59:11 -070056enum ubd_req { UBD_READ, UBD_WRITE };
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58struct io_thread_req {
Jeff Dike91acb212005-10-10 23:10:32 -040059 enum ubd_req op;
Linus Torvalds1da177e2005-04-16 15:20:36 -070060 int fds[2];
61 unsigned long offsets[2];
62 unsigned long long offset;
63 unsigned long length;
64 char *buffer;
65 int sectorsize;
Jeff Dike91acb212005-10-10 23:10:32 -040066 unsigned long sector_mask;
67 unsigned long long cow_offset;
68 unsigned long bitmap_words[2];
Linus Torvalds1da177e2005-04-16 15:20:36 -070069 int error;
70};
71
Jeff Dike6c29256c2006-03-27 01:14:37 -080072extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
Linus Torvalds1da177e2005-04-16 15:20:36 -070073 char **backing_file_out, int *bitmap_offset_out,
74 unsigned long *bitmap_len_out, int *data_offset_out,
75 int *create_cow_out);
76extern int create_cow_file(char *cow_file, char *backing_file,
77 struct openflags flags, int sectorsize,
78 int alignment, int *bitmap_offset_out,
79 unsigned long *bitmap_len_out,
80 int *data_offset_out);
81extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
Jeff Dike91acb212005-10-10 23:10:32 -040082extern void do_io(struct io_thread_req *req);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
Jeff Dike91acb212005-10-10 23:10:32 -040084static inline int ubd_test_bit(__u64 bit, unsigned char *data)
Linus Torvalds1da177e2005-04-16 15:20:36 -070085{
86 __u64 n;
87 int bits, off;
88
Jeff Dike91acb212005-10-10 23:10:32 -040089 bits = sizeof(data[0]) * 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 n = bit / bits;
91 off = bit % bits;
Jeff Dike91acb212005-10-10 23:10:32 -040092 return((data[n] & (1 << off)) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -070093}
94
Jeff Dike91acb212005-10-10 23:10:32 -040095static inline void ubd_set_bit(__u64 bit, unsigned char *data)
Linus Torvalds1da177e2005-04-16 15:20:36 -070096{
97 __u64 n;
98 int bits, off;
99
Jeff Dike91acb212005-10-10 23:10:32 -0400100 bits = sizeof(data[0]) * 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101 n = bit / bits;
102 off = bit % bits;
Jeff Dike91acb212005-10-10 23:10:32 -0400103 data[n] |= (1 << off);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104}
105/*End stuff from ubd_user.h*/
106
107#define DRIVER_NAME "uml-blkdev"
108
109static DEFINE_SPINLOCK(ubd_io_lock);
110static DEFINE_SPINLOCK(ubd_lock);
111
Jeff Dike91acb212005-10-10 23:10:32 -0400112static void (*do_ubd)(void);
113
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114static int ubd_open(struct inode * inode, struct file * filp);
115static int ubd_release(struct inode * inode, struct file * file);
116static int ubd_ioctl(struct inode * inode, struct file * file,
117 unsigned int cmd, unsigned long arg);
Christoph Hellwiga885c8c2006-01-08 01:02:50 -0800118static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
Paolo 'Blaisorblade' Giarrusso97d88ac2006-10-30 22:07:03 -0800120#define MAX_DEV (16)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122static struct block_device_operations ubd_blops = {
123 .owner = THIS_MODULE,
124 .open = ubd_open,
125 .release = ubd_release,
126 .ioctl = ubd_ioctl,
Christoph Hellwiga885c8c2006-01-08 01:02:50 -0800127 .getgeo = ubd_getgeo,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128};
129
130/* Protected by the queue_lock */
131static request_queue_t *ubd_queue;
132
133/* Protected by ubd_lock */
134static int fake_major = MAJOR_NR;
135
136static struct gendisk *ubd_gendisk[MAX_DEV];
137static struct gendisk *fake_gendisk[MAX_DEV];
Jeff Dike6c29256c2006-03-27 01:14:37 -0800138
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139#ifdef CONFIG_BLK_DEV_UBD_SYNC
140#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
141 .cl = 1 })
142#else
143#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
144 .cl = 1 })
145#endif
146
147/* Not protected - changed only in ubd_setup_common and then only to
148 * to enable O_SYNC.
149 */
150static struct openflags global_openflags = OPEN_FLAGS;
151
152struct cow {
Paolo 'Blaisorblade' Giarrusso2a9d32f2006-10-30 22:07:04 -0800153 /* backing file name */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 char *file;
Paolo 'Blaisorblade' Giarrusso2a9d32f2006-10-30 22:07:04 -0800155 /* backing file fd */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 int fd;
157 unsigned long *bitmap;
158 unsigned long bitmap_len;
159 int bitmap_offset;
160 int data_offset;
161};
162
163struct ubd {
Paolo 'Blaisorblade' Giarrusso2a9d32f2006-10-30 22:07:04 -0800164 /* name (and fd, below) of the file opened for writing, either the
165 * backing or the cow file. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 char *file;
167 int count;
168 int fd;
169 __u64 size;
170 struct openflags boot_openflags;
171 struct openflags openflags;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800172 int shared;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 int no_cow;
174 struct cow cow;
175 struct platform_device pdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176};
177
178#define DEFAULT_COW { \
179 .file = NULL, \
180 .fd = -1, \
181 .bitmap = NULL, \
182 .bitmap_offset = 0, \
183 .data_offset = 0, \
184}
185
186#define DEFAULT_UBD { \
187 .file = NULL, \
188 .count = 0, \
189 .fd = -1, \
190 .size = -1, \
191 .boot_openflags = OPEN_FLAGS, \
192 .openflags = OPEN_FLAGS, \
193 .no_cow = 0, \
Jeff Dike6c29256c2006-03-27 01:14:37 -0800194 .shared = 0, \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 .cow = DEFAULT_COW, \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196}
197
198struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
199
200static int ubd0_init(void)
201{
202 struct ubd *dev = &ubd_dev[0];
203
204 if(dev->file == NULL)
205 dev->file = "root_fs";
206 return(0);
207}
208
209__initcall(ubd0_init);
210
211/* Only changed by fake_ide_setup which is a setup */
212static int fake_ide = 0;
213static struct proc_dir_entry *proc_ide_root = NULL;
214static struct proc_dir_entry *proc_ide = NULL;
215
216static void make_proc_ide(void)
217{
218 proc_ide_root = proc_mkdir("ide", NULL);
219 proc_ide = proc_mkdir("ide0", proc_ide_root);
220}
221
222static int proc_ide_read_media(char *page, char **start, off_t off, int count,
223 int *eof, void *data)
224{
225 int len;
226
227 strcpy(page, "disk\n");
228 len = strlen("disk\n");
229 len -= off;
230 if (len < count){
231 *eof = 1;
232 if (len <= 0) return 0;
233 }
234 else len = count;
235 *start = page + off;
236 return len;
237}
238
239static void make_ide_entries(char *dev_name)
240{
241 struct proc_dir_entry *dir, *ent;
242 char name[64];
243
244 if(proc_ide_root == NULL) make_proc_ide();
245
246 dir = proc_mkdir(dev_name, proc_ide);
247 if(!dir) return;
248
249 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
250 if(!ent) return;
251 ent->nlink = 1;
252 ent->data = NULL;
253 ent->read_proc = proc_ide_read_media;
254 ent->write_proc = NULL;
255 sprintf(name,"ide0/%s", dev_name);
256 proc_symlink(dev_name, proc_ide_root, name);
257}
258
259static int fake_ide_setup(char *str)
260{
261 fake_ide = 1;
262 return(1);
263}
264
265__setup("fake_ide", fake_ide_setup);
266
267__uml_help(fake_ide_setup,
268"fake_ide\n"
269" Create ide0 entries that map onto ubd devices.\n\n"
270);
271
272static int parse_unit(char **ptr)
273{
274 char *str = *ptr, *end;
275 int n = -1;
276
277 if(isdigit(*str)) {
278 n = simple_strtoul(str, &end, 0);
279 if(end == str)
280 return(-1);
281 *ptr = end;
282 }
Paolo 'Blaisorblade' Giarrusso97d88ac2006-10-30 22:07:03 -0800283 else if (('a' <= *str) && (*str <= 'z')) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 n = *str - 'a';
285 str++;
286 *ptr = str;
287 }
288 return(n);
289}
290
291static int ubd_setup_common(char *str, int *index_out)
292{
293 struct ubd *dev;
294 struct openflags flags = global_openflags;
295 char *backing_file;
296 int n, err, i;
297
298 if(index_out) *index_out = -1;
299 n = *str;
300 if(n == '='){
301 char *end;
302 int major;
303
304 str++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 if(!strcmp(str, "sync")){
306 global_openflags = of_sync(global_openflags);
307 return(0);
308 }
309 major = simple_strtoul(str, &end, 0);
310 if((*end != '\0') || (end == str)){
Jeff Dike6c29256c2006-03-27 01:14:37 -0800311 printk(KERN_ERR
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 "ubd_setup : didn't parse major number\n");
313 return(1);
314 }
315
316 err = 1;
317 spin_lock(&ubd_lock);
318 if(fake_major != MAJOR_NR){
319 printk(KERN_ERR "Can't assign a fake major twice\n");
320 goto out1;
321 }
Jeff Dike6c29256c2006-03-27 01:14:37 -0800322
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 fake_major = major;
324
325 printk(KERN_INFO "Setting extra ubd major number to %d\n",
326 major);
327 err = 0;
328 out1:
329 spin_unlock(&ubd_lock);
330 return(err);
331 }
332
333 n = parse_unit(&str);
334 if(n < 0){
335 printk(KERN_ERR "ubd_setup : couldn't parse unit number "
336 "'%s'\n", str);
337 return(1);
338 }
339 if(n >= MAX_DEV){
340 printk(KERN_ERR "ubd_setup : index %d out of range "
341 "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
342 return(1);
343 }
344
345 err = 1;
346 spin_lock(&ubd_lock);
347
348 dev = &ubd_dev[n];
349 if(dev->file != NULL){
350 printk(KERN_ERR "ubd_setup : device already configured\n");
351 goto out;
352 }
353
354 if (index_out)
355 *index_out = n;
356
Jeff Dike6c29256c2006-03-27 01:14:37 -0800357 for (i = 0; i < sizeof("rscd="); i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 switch (*str) {
359 case 'r':
360 flags.w = 0;
361 break;
362 case 's':
363 flags.s = 1;
364 break;
365 case 'd':
366 dev->no_cow = 1;
367 break;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800368 case 'c':
369 dev->shared = 1;
370 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 case '=':
372 str++;
373 goto break_loop;
374 default:
Jeff Dike6c29256c2006-03-27 01:14:37 -0800375 printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r, s, c, or d)\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 goto out;
377 }
378 str++;
379 }
380
381 if (*str == '=')
382 printk(KERN_ERR "ubd_setup : Too many flags specified\n");
383 else
384 printk(KERN_ERR "ubd_setup : Expected '='\n");
385 goto out;
386
387break_loop:
388 err = 0;
389 backing_file = strchr(str, ',');
390
391 if (!backing_file) {
392 backing_file = strchr(str, ':');
393 }
394
395 if(backing_file){
396 if(dev->no_cow)
397 printk(KERN_ERR "Can't specify both 'd' and a "
398 "cow file\n");
399 else {
400 *backing_file = '\0';
401 backing_file++;
402 }
403 }
404 dev->file = str;
405 dev->cow.file = backing_file;
406 dev->boot_openflags = flags;
407out:
408 spin_unlock(&ubd_lock);
409 return(err);
410}
411
412static int ubd_setup(char *str)
413{
414 ubd_setup_common(str, NULL);
415 return(1);
416}
417
418__setup("ubd", ubd_setup);
419__uml_help(ubd_setup,
420"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
421" This is used to associate a device with a file in the underlying\n"
422" filesystem. When specifying two filenames, the first one is the\n"
423" COW name and the second is the backing file name. As separator you can\n"
424" use either a ':' or a ',': the first one allows writing things like;\n"
425" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
426" while with a ',' the shell would not expand the 2nd '~'.\n"
427" When using only one filename, UML will detect whether to thread it like\n"
428" a COW file or a backing file. To override this detection, add the 'd'\n"
429" flag:\n"
430" ubd0d=BackingFile\n"
431" Usually, there is a filesystem in the file, but \n"
432" that's not required. Swap devices containing swap files can be\n"
433" specified like this. Also, a file which doesn't contain a\n"
434" filesystem can have its contents read in the virtual \n"
435" machine by running 'dd' on the device. <n> must be in the range\n"
436" 0 to 7. Appending an 'r' to the number will cause that device\n"
437" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
438" an 's' will cause data to be written to disk on the host immediately.\n\n"
439);
440
441static int udb_setup(char *str)
442{
443 printk("udb%s specified on command line is almost certainly a ubd -> "
444 "udb TYPO\n", str);
445 return(1);
446}
447
448__setup("udb", udb_setup);
449__uml_help(udb_setup,
450"udb\n"
Jeff Dike0894e272005-05-28 15:51:55 -0700451" This option is here solely to catch ubd -> udb typos, which can be\n"
452" to impossible to catch visually unless you specifically look for\n"
453" them. The only result of any option starting with 'udb' is an error\n"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454" in the boot output.\n\n"
455);
456
457static int fakehd_set = 0;
458static int fakehd(char *str)
459{
460 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
461 fakehd_set = 1;
462 return 1;
463}
464
465__setup("fakehd", fakehd);
466__uml_help(fakehd,
467"fakehd\n"
468" Change the ubd device name to \"hd\".\n\n"
469);
470
471static void do_ubd_request(request_queue_t * q);
Jeff Dike91acb212005-10-10 23:10:32 -0400472
473/* Only changed by ubd_init, which is an initcall. */
474int thread_fd = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475
476/* Changed by ubd_handler, which is serialized because interrupts only
477 * happen on CPU 0.
478 */
479int intr_count = 0;
480
481/* call ubd_finish if you need to serialize */
Jeff Dike91acb212005-10-10 23:10:32 -0400482static void __ubd_finish(struct request *req, int error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483{
Jeff Dike91acb212005-10-10 23:10:32 -0400484 int nsect;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485
Jeff Dike91acb212005-10-10 23:10:32 -0400486 if(error){
487 end_request(req, 0);
488 return;
489 }
490 nsect = req->current_nr_sectors;
491 req->sector += nsect;
492 req->buffer += nsect << 9;
493 req->errors = 0;
494 req->nr_sectors -= nsect;
495 req->current_nr_sectors = 0;
496 end_request(req, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497}
498
Jeff Dike91acb212005-10-10 23:10:32 -0400499static inline void ubd_finish(struct request *req, int error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500{
Jeff Dike91acb212005-10-10 23:10:32 -0400501 spin_lock(&ubd_io_lock);
502 __ubd_finish(req, error);
503 spin_unlock(&ubd_io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504}
505
Jeff Dike91acb212005-10-10 23:10:32 -0400506/* Called without ubd_io_lock held */
507static void ubd_handler(void)
508{
509 struct io_thread_req req;
510 struct request *rq = elv_next_request(ubd_queue);
511 int n;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512
Jeff Dike91acb212005-10-10 23:10:32 -0400513 do_ubd = NULL;
514 intr_count++;
515 n = os_read_file(thread_fd, &req, sizeof(req));
516 if(n != sizeof(req)){
517 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
518 "err = %d\n", os_getpid(), -n);
519 spin_lock(&ubd_io_lock);
520 end_request(rq, 0);
521 spin_unlock(&ubd_io_lock);
522 return;
523 }
Jeff Dike6c29256c2006-03-27 01:14:37 -0800524
Jeff Dike91acb212005-10-10 23:10:32 -0400525 ubd_finish(rq, req.error);
526 reactivate_fd(thread_fd, UBD_IRQ);
527 do_ubd_request(ubd_queue);
528}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529
Al Viro7bea96f2006-10-08 22:49:34 +0100530static irqreturn_t ubd_intr(int irq, void *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531{
Jeff Dike91acb212005-10-10 23:10:32 -0400532 ubd_handler();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 return(IRQ_HANDLED);
534}
535
Jeff Dike91acb212005-10-10 23:10:32 -0400536/* Only changed by ubd_init, which is an initcall. */
537static int io_pid = -1;
538
539void kill_io_thread(void)
540{
Jeff Dike6c29256c2006-03-27 01:14:37 -0800541 if(io_pid != -1)
Jeff Dike91acb212005-10-10 23:10:32 -0400542 os_kill_process(io_pid, 1);
543}
544
545__uml_exitcall(kill_io_thread);
546
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547static int ubd_file_size(struct ubd *dev, __u64 *size_out)
548{
549 char *file;
550
551 file = dev->cow.file ? dev->cow.file : dev->file;
552 return(os_file_size(file, size_out));
553}
554
555static void ubd_close(struct ubd *dev)
556{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 os_close_file(dev->fd);
558 if(dev->cow.file == NULL)
559 return;
560
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 os_close_file(dev->cow.fd);
562 vfree(dev->cow.bitmap);
563 dev->cow.bitmap = NULL;
564}
565
566static int ubd_open_dev(struct ubd *dev)
567{
568 struct openflags flags;
569 char **back_ptr;
570 int err, create_cow, *create_ptr;
571
572 dev->openflags = dev->boot_openflags;
573 create_cow = 0;
574 create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
575 back_ptr = dev->no_cow ? NULL : &dev->cow.file;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800576 dev->fd = open_ubd_file(dev->file, &dev->openflags, dev->shared,
577 back_ptr, &dev->cow.bitmap_offset,
578 &dev->cow.bitmap_len, &dev->cow.data_offset,
579 create_ptr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580
581 if((dev->fd == -ENOENT) && create_cow){
Jeff Dike6c29256c2006-03-27 01:14:37 -0800582 dev->fd = create_cow_file(dev->file, dev->cow.file,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 dev->openflags, 1 << 9, PAGE_SIZE,
Jeff Dike6c29256c2006-03-27 01:14:37 -0800584 &dev->cow.bitmap_offset,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 &dev->cow.bitmap_len,
586 &dev->cow.data_offset);
587 if(dev->fd >= 0){
588 printk(KERN_INFO "Creating \"%s\" as COW file for "
589 "\"%s\"\n", dev->file, dev->cow.file);
590 }
591 }
592
593 if(dev->fd < 0){
594 printk("Failed to open '%s', errno = %d\n", dev->file,
595 -dev->fd);
596 return(dev->fd);
597 }
598
599 if(dev->cow.file != NULL){
600 err = -ENOMEM;
601 dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
602 if(dev->cow.bitmap == NULL){
603 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
604 goto error;
605 }
606 flush_tlb_kernel_vm();
607
Jeff Dike6c29256c2006-03-27 01:14:37 -0800608 err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
609 dev->cow.bitmap_offset,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610 dev->cow.bitmap_len);
611 if(err < 0)
612 goto error;
613
614 flags = dev->openflags;
615 flags.w = 0;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800616 err = open_ubd_file(dev->cow.file, &flags, dev->shared, NULL,
617 NULL, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 if(err < 0) goto error;
619 dev->cow.fd = err;
620 }
621 return(0);
622 error:
623 os_close_file(dev->fd);
624 return(err);
625}
626
627static int ubd_new_disk(int major, u64 size, int unit,
628 struct gendisk **disk_out)
629
630{
631 struct gendisk *disk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
633 disk = alloc_disk(1 << UBD_SHIFT);
634 if(disk == NULL)
635 return(-ENOMEM);
636
637 disk->major = major;
638 disk->first_minor = unit << UBD_SHIFT;
639 disk->fops = &ubd_blops;
640 set_capacity(disk, size / 512);
Greg Kroah-Hartmance7b0f42005-06-20 21:15:16 -0700641 if(major == MAJOR_NR)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
Greg Kroah-Hartmance7b0f42005-06-20 21:15:16 -0700643 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 sprintf(disk->disk_name, "ubd_fake%d", unit);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645
646 /* sysfs register (not for ide fake devices) */
647 if (major == MAJOR_NR) {
648 ubd_dev[unit].pdev.id = unit;
649 ubd_dev[unit].pdev.name = DRIVER_NAME;
650 platform_device_register(&ubd_dev[unit].pdev);
651 disk->driverfs_dev = &ubd_dev[unit].pdev.dev;
652 }
653
654 disk->private_data = &ubd_dev[unit];
655 disk->queue = ubd_queue;
656 add_disk(disk);
657
658 *disk_out = disk;
659 return 0;
660}
661
662#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
663
664static int ubd_add(int n)
665{
666 struct ubd *dev = &ubd_dev[n];
667 int err;
668
Jeff Dikeec7cf782005-09-03 15:57:29 -0700669 err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 if(dev->file == NULL)
Jeff Dikeec7cf782005-09-03 15:57:29 -0700671 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 err = ubd_file_size(dev, &dev->size);
674 if(err < 0)
Jeff Dike80c13742006-09-29 01:58:51 -0700675 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676
677 dev->size = ROUND_BLOCK(dev->size);
678
679 err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
Jeff Dike6c29256c2006-03-27 01:14:37 -0800680 if(err)
Jeff Dike80c13742006-09-29 01:58:51 -0700681 goto out;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800682
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 if(fake_major != MAJOR_NR)
Jeff Dike6c29256c2006-03-27 01:14:37 -0800684 ubd_new_disk(fake_major, dev->size, n,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 &fake_gendisk[n]);
686
687 /* perhaps this should also be under the "if (fake_major)" above */
688 /* using the fake_disk->disk_name and also the fakehd_set name */
689 if (fake_ide)
690 make_ide_entries(ubd_gendisk[n]->disk_name);
691
Jeff Dikeec7cf782005-09-03 15:57:29 -0700692 err = 0;
Jeff Dikeec7cf782005-09-03 15:57:29 -0700693out:
694 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695}
696
697static int ubd_config(char *str)
698{
699 int n, err;
700
Jeff Dike970d6e32006-01-06 00:18:48 -0800701 str = kstrdup(str, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 if(str == NULL){
703 printk(KERN_ERR "ubd_config failed to strdup string\n");
704 return(1);
705 }
706 err = ubd_setup_common(str, &n);
707 if(err){
708 kfree(str);
709 return(-1);
710 }
711 if(n == -1) return(0);
712
713 spin_lock(&ubd_lock);
714 err = ubd_add(n);
715 if(err)
716 ubd_dev[n].file = NULL;
717 spin_unlock(&ubd_lock);
718
719 return(err);
720}
721
722static int ubd_get_config(char *name, char *str, int size, char **error_out)
723{
724 struct ubd *dev;
725 int n, len = 0;
726
727 n = parse_unit(&name);
728 if((n >= MAX_DEV) || (n < 0)){
729 *error_out = "ubd_get_config : device number out of range";
730 return(-1);
731 }
732
733 dev = &ubd_dev[n];
734 spin_lock(&ubd_lock);
735
736 if(dev->file == NULL){
737 CONFIG_CHUNK(str, size, len, "", 1);
738 goto out;
739 }
740
741 CONFIG_CHUNK(str, size, len, dev->file, 0);
742
743 if(dev->cow.file != NULL){
744 CONFIG_CHUNK(str, size, len, ",", 0);
745 CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
746 }
747 else CONFIG_CHUNK(str, size, len, "", 1);
748
749 out:
750 spin_unlock(&ubd_lock);
751 return(len);
752}
753
Jeff Dike29d56cf2005-06-25 14:55:25 -0700754static int ubd_id(char **str, int *start_out, int *end_out)
755{
756 int n;
757
758 n = parse_unit(str);
759 *start_out = 0;
760 *end_out = MAX_DEV - 1;
761 return n;
762}
763
764static int ubd_remove(int n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765{
766 struct ubd *dev;
Jeff Dike29d56cf2005-06-25 14:55:25 -0700767 int err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768
Jeff Dike29d56cf2005-06-25 14:55:25 -0700769 spin_lock(&ubd_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770
771 if(ubd_gendisk[n] == NULL)
772 goto out;
773
Jeff Dike29d56cf2005-06-25 14:55:25 -0700774 dev = &ubd_dev[n];
775
776 if(dev->file == NULL)
777 goto out;
778
779 /* you cannot remove a open disk */
780 err = -EBUSY;
781 if(dev->count > 0)
782 goto out;
783
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 del_gendisk(ubd_gendisk[n]);
785 put_disk(ubd_gendisk[n]);
786 ubd_gendisk[n] = NULL;
787
788 if(fake_gendisk[n] != NULL){
789 del_gendisk(fake_gendisk[n]);
790 put_disk(fake_gendisk[n]);
791 fake_gendisk[n] = NULL;
792 }
793
794 platform_device_unregister(&dev->pdev);
795 *dev = ((struct ubd) DEFAULT_UBD);
796 err = 0;
Jeff Dike29d56cf2005-06-25 14:55:25 -0700797out:
798 spin_unlock(&ubd_lock);
799 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800}
801
802static struct mc_device ubd_mc = {
803 .name = "ubd",
804 .config = ubd_config,
805 .get_config = ubd_get_config,
Jeff Dike29d56cf2005-06-25 14:55:25 -0700806 .id = ubd_id,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 .remove = ubd_remove,
808};
809
810static int ubd_mc_init(void)
811{
812 mconsole_register_dev(&ubd_mc);
813 return 0;
814}
815
816__initcall(ubd_mc_init);
817
Russell King3ae5eae2005-11-09 22:32:44 +0000818static struct platform_driver ubd_driver = {
819 .driver = {
820 .name = DRIVER_NAME,
821 },
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822};
823
824int ubd_init(void)
825{
826 int i;
827
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 if (register_blkdev(MAJOR_NR, "ubd"))
829 return -1;
830
831 ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
832 if (!ubd_queue) {
833 unregister_blkdev(MAJOR_NR, "ubd");
834 return -1;
835 }
836
837 if (fake_major != MAJOR_NR) {
838 char name[sizeof("ubd_nnn\0")];
839
840 snprintf(name, sizeof(name), "ubd_%d", fake_major);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 if (register_blkdev(fake_major, "ubd"))
842 return -1;
843 }
Russell King3ae5eae2005-11-09 22:32:44 +0000844 platform_driver_register(&ubd_driver);
Jeff Dike6c29256c2006-03-27 01:14:37 -0800845 for (i = 0; i < MAX_DEV; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 ubd_add(i);
847 return 0;
848}
849
850late_initcall(ubd_init);
851
Jeff Dike91acb212005-10-10 23:10:32 -0400852int ubd_driver_init(void){
853 unsigned long stack;
854 int err;
855
856 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
857 if(global_openflags.s){
858 printk(KERN_INFO "ubd: Synchronous mode\n");
859 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
860 * enough. So use anyway the io thread. */
861 }
862 stack = alloc_stack(0, 0);
Jeff Dike6c29256c2006-03-27 01:14:37 -0800863 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
Jeff Dike91acb212005-10-10 23:10:32 -0400864 &thread_fd);
865 if(io_pid < 0){
Jeff Dike6c29256c2006-03-27 01:14:37 -0800866 printk(KERN_ERR
Jeff Dike91acb212005-10-10 23:10:32 -0400867 "ubd : Failed to start I/O thread (errno = %d) - "
868 "falling back to synchronous I/O\n", -io_pid);
869 io_pid = -1;
870 return(0);
871 }
Jeff Dike6c29256c2006-03-27 01:14:37 -0800872 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
Thomas Gleixnerbd6aa652006-07-01 19:29:27 -0700873 IRQF_DISABLED, "ubd", ubd_dev);
Jeff Dike91acb212005-10-10 23:10:32 -0400874 if(err != 0)
875 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
Jeff Dikef4c57a72006-03-31 02:30:10 -0800876 return 0;
Jeff Dike91acb212005-10-10 23:10:32 -0400877}
878
879device_initcall(ubd_driver_init);
880
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881static int ubd_open(struct inode *inode, struct file *filp)
882{
883 struct gendisk *disk = inode->i_bdev->bd_disk;
884 struct ubd *dev = disk->private_data;
885 int err = 0;
886
887 if(dev->count == 0){
888 err = ubd_open_dev(dev);
889 if(err){
890 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
891 disk->disk_name, dev->file, -err);
892 goto out;
893 }
894 }
895 dev->count++;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700896 set_disk_ro(disk, !dev->openflags.w);
897
898 /* This should no more be needed. And it didn't work anyway to exclude
899 * read-write remounting of filesystems.*/
900 /*if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 if(--dev->count == 0) ubd_close(dev);
902 err = -EROFS;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700903 }*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 out:
905 return(err);
906}
907
908static int ubd_release(struct inode * inode, struct file * file)
909{
910 struct gendisk *disk = inode->i_bdev->bd_disk;
911 struct ubd *dev = disk->private_data;
912
913 if(--dev->count == 0)
914 ubd_close(dev);
915 return(0);
916}
917
Jeff Dike91acb212005-10-10 23:10:32 -0400918static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
919 __u64 *cow_offset, unsigned long *bitmap,
920 __u64 bitmap_offset, unsigned long *bitmap_words,
921 __u64 bitmap_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922{
Jeff Dike91acb212005-10-10 23:10:32 -0400923 __u64 sector = io_offset >> 9;
924 int i, update_bitmap = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925
Jeff Dike91acb212005-10-10 23:10:32 -0400926 for(i = 0; i < length >> 9; i++){
927 if(cow_mask != NULL)
928 ubd_set_bit(i, (unsigned char *) cow_mask);
929 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
930 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931
Jeff Dike91acb212005-10-10 23:10:32 -0400932 update_bitmap = 1;
933 ubd_set_bit(sector + i, (unsigned char *) bitmap);
934 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935
Jeff Dike91acb212005-10-10 23:10:32 -0400936 if(!update_bitmap)
937 return;
938
939 *cow_offset = sector / (sizeof(unsigned long) * 8);
940
941 /* This takes care of the case where we're exactly at the end of the
942 * device, and *cow_offset + 1 is off the end. So, just back it up
943 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
944 * for the original diagnosis.
945 */
946 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
947 sizeof(unsigned long) - 1))
948 (*cow_offset)--;
949
950 bitmap_words[0] = bitmap[*cow_offset];
951 bitmap_words[1] = bitmap[*cow_offset + 1];
952
953 *cow_offset *= sizeof(unsigned long);
954 *cow_offset += bitmap_offset;
955}
956
957static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
958 __u64 bitmap_offset, __u64 bitmap_len)
959{
960 __u64 sector = req->offset >> 9;
961 int i;
962
963 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
964 panic("Operation too long");
965
966 if(req->op == UBD_READ) {
967 for(i = 0; i < req->length >> 9; i++){
968 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
Jeff Dike6c29256c2006-03-27 01:14:37 -0800969 ubd_set_bit(i, (unsigned char *)
Jeff Dike91acb212005-10-10 23:10:32 -0400970 &req->sector_mask);
971 }
972 }
973 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
974 &req->cow_offset, bitmap, bitmap_offset,
975 req->bitmap_words, bitmap_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976}
977
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978/* Called with ubd_io_lock held */
Jeff Dike91acb212005-10-10 23:10:32 -0400979static int prepare_request(struct request *req, struct io_thread_req *io_req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980{
981 struct gendisk *disk = req->rq_disk;
982 struct ubd *dev = disk->private_data;
Jeff Dike91acb212005-10-10 23:10:32 -0400983 __u64 offset;
984 int len;
985
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700986 /* This should be impossible now */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
Jeff Dike6c29256c2006-03-27 01:14:37 -0800988 printk("Write attempted on readonly ubd device %s\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 disk->disk_name);
Jeff Dike91acb212005-10-10 23:10:32 -0400990 end_request(req, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 return(1);
992 }
993
Jeff Dike91acb212005-10-10 23:10:32 -0400994 offset = ((__u64) req->sector) << 9;
995 len = req->current_nr_sectors << 9;
996
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
998 io_req->fds[1] = dev->fd;
Jeff Dike91acb212005-10-10 23:10:32 -0400999 io_req->cow_offset = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 io_req->offset = offset;
1001 io_req->length = len;
1002 io_req->error = 0;
Jeff Dike91acb212005-10-10 23:10:32 -04001003 io_req->sector_mask = 0;
1004
1005 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 io_req->offsets[0] = 0;
1007 io_req->offsets[1] = dev->cow.data_offset;
Jeff Dike91acb212005-10-10 23:10:32 -04001008 io_req->buffer = req->buffer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 io_req->sectorsize = 1 << 9;
1010
Jeff Dike91acb212005-10-10 23:10:32 -04001011 if(dev->cow.file != NULL)
1012 cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
1013 dev->cow.bitmap_len);
1014
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 return(0);
1016}
1017
1018/* Called with ubd_io_lock held */
1019static void do_ubd_request(request_queue_t *q)
1020{
1021 struct io_thread_req io_req;
1022 struct request *req;
Jeff Dike91acb212005-10-10 23:10:32 -04001023 int err, n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024
Jeff Dike91acb212005-10-10 23:10:32 -04001025 if(thread_fd == -1){
1026 while((req = elv_next_request(q)) != NULL){
1027 err = prepare_request(req, &io_req);
1028 if(!err){
1029 do_io(&io_req);
1030 __ubd_finish(req, io_req.error);
1031 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 }
1033 }
Jeff Dike91acb212005-10-10 23:10:32 -04001034 else {
1035 if(do_ubd || (req = elv_next_request(q)) == NULL)
1036 return;
1037 err = prepare_request(req, &io_req);
1038 if(!err){
1039 do_ubd = ubd_handler;
1040 n = os_write_file(thread_fd, (char *) &io_req,
1041 sizeof(io_req));
1042 if(n != sizeof(io_req))
1043 printk("write to io thread failed, "
1044 "errno = %d\n", -n);
1045 }
1046 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047}
1048
Christoph Hellwiga885c8c2006-01-08 01:02:50 -08001049static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1050{
1051 struct ubd *dev = bdev->bd_disk->private_data;
1052
1053 geo->heads = 128;
1054 geo->sectors = 32;
1055 geo->cylinders = dev->size / (128 * 32 * 512);
1056 return 0;
1057}
1058
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059static int ubd_ioctl(struct inode * inode, struct file * file,
1060 unsigned int cmd, unsigned long arg)
1061{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 struct ubd *dev = inode->i_bdev->bd_disk->private_data;
1063 struct hd_driveid ubd_id = {
1064 .cyls = 0,
1065 .heads = 128,
1066 .sectors = 32,
1067 };
1068
1069 switch (cmd) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 struct cdrom_volctrl volume;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 case HDIO_GET_IDENTITY:
1072 ubd_id.cyls = dev->size / (128 * 32 * 512);
1073 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1074 sizeof(ubd_id)))
1075 return(-EFAULT);
1076 return(0);
1077
1078 case CDROMVOLREAD:
1079 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1080 return(-EFAULT);
1081 volume.channel0 = 255;
1082 volume.channel1 = 255;
1083 volume.channel2 = 255;
1084 volume.channel3 = 255;
1085 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1086 return(-EFAULT);
1087 return(0);
1088 }
1089 return(-EINVAL);
1090}
1091
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001092static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093{
1094 struct uml_stat buf1, buf2;
1095 int err;
1096
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001097 if(from_cmdline == NULL)
1098 return 0;
1099 if(!strcmp(from_cmdline, from_cow))
1100 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101
1102 err = os_stat_file(from_cmdline, &buf1);
1103 if(err < 0){
1104 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001105 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 }
1107 err = os_stat_file(from_cow, &buf2);
1108 if(err < 0){
1109 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001110 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111 }
1112 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001113 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114
1115 printk("Backing file mismatch - \"%s\" requested,\n"
1116 "\"%s\" specified in COW header of \"%s\"\n",
1117 from_cmdline, from_cow, cow);
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001118 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119}
1120
1121static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1122{
1123 unsigned long modtime;
Paolo 'Blaisorblade' Giarrussofe1db502006-02-24 13:03:58 -08001124 unsigned long long actual;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 int err;
1126
1127 err = os_file_modtime(file, &modtime);
1128 if(err < 0){
1129 printk("Failed to get modification time of backing file "
1130 "\"%s\", err = %d\n", file, -err);
1131 return(err);
1132 }
1133
1134 err = os_file_size(file, &actual);
1135 if(err < 0){
1136 printk("Failed to get size of backing file \"%s\", "
1137 "err = %d\n", file, -err);
1138 return(err);
1139 }
1140
1141 if(actual != size){
1142 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1143 * the typecast.*/
1144 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1145 "file\n", (unsigned long long) size, actual);
1146 return(-EINVAL);
1147 }
1148 if(modtime != mtime){
1149 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1150 "file\n", mtime, modtime);
1151 return(-EINVAL);
1152 }
1153 return(0);
1154}
1155
1156int read_cow_bitmap(int fd, void *buf, int offset, int len)
1157{
1158 int err;
1159
1160 err = os_seek_file(fd, offset);
1161 if(err < 0)
1162 return(err);
1163
1164 err = os_read_file(fd, buf, len);
1165 if(err < 0)
1166 return(err);
1167
1168 return(0);
1169}
1170
Jeff Dike6c29256c2006-03-27 01:14:37 -08001171int open_ubd_file(char *file, struct openflags *openflags, int shared,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 char **backing_file_out, int *bitmap_offset_out,
1173 unsigned long *bitmap_len_out, int *data_offset_out,
1174 int *create_cow_out)
1175{
1176 time_t mtime;
1177 unsigned long long size;
1178 __u32 version, align;
1179 char *backing_file;
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001180 int fd, err, sectorsize, asked_switch, mode = 0644;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181
1182 fd = os_open_file(file, *openflags, mode);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001183 if (fd < 0) {
1184 if ((fd == -ENOENT) && (create_cow_out != NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 *create_cow_out = 1;
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001186 if (!openflags->w ||
1187 ((fd != -EROFS) && (fd != -EACCES)))
1188 return fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 openflags->w = 0;
1190 fd = os_open_file(file, *openflags, mode);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001191 if (fd < 0)
1192 return fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193 }
1194
Jeff Dike6c29256c2006-03-27 01:14:37 -08001195 if(shared)
1196 printk("Not locking \"%s\" on the host\n", file);
1197 else {
1198 err = os_lock_file(fd, openflags->w);
1199 if(err < 0){
1200 printk("Failed to lock '%s', err = %d\n", file, -err);
1201 goto out_close;
1202 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203 }
1204
Andreas Mohrd6e05ed2006-06-26 18:35:02 +02001205 /* Successful return case! */
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001206 if(backing_file_out == NULL)
1207 return(fd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208
1209 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1210 &size, &sectorsize, &align, bitmap_offset_out);
1211 if(err && (*backing_file_out != NULL)){
1212 printk("Failed to read COW header from COW file \"%s\", "
1213 "errno = %d\n", file, -err);
1214 goto out_close;
1215 }
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001216 if(err)
1217 return(fd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001219 asked_switch = path_requires_switch(*backing_file_out, backing_file, file);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001221 /* Allow switching only if no mismatch. */
1222 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 printk("Switching backing file to '%s'\n", *backing_file_out);
1224 err = write_cow_header(file, fd, *backing_file_out,
1225 sectorsize, align, &size);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001226 if (err) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 printk("Switch failed, errno = %d\n", -err);
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001228 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 }
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001230 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231 *backing_file_out = backing_file;
1232 err = backing_file_mismatch(*backing_file_out, size, mtime);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001233 if (err)
1234 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 }
1236
1237 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1238 bitmap_len_out, data_offset_out);
1239
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001240 return fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 out_close:
1242 os_close_file(fd);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001243 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244}
1245
1246int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1247 int sectorsize, int alignment, int *bitmap_offset_out,
1248 unsigned long *bitmap_len_out, int *data_offset_out)
1249{
1250 int err, fd;
1251
1252 flags.c = 1;
Jeff Dike6c29256c2006-03-27 01:14:37 -08001253 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 if(fd < 0){
1255 err = fd;
1256 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1257 -err);
1258 goto out;
1259 }
1260
1261 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1262 bitmap_offset_out, bitmap_len_out,
1263 data_offset_out);
1264 if(!err)
1265 return(fd);
1266 os_close_file(fd);
1267 out:
1268 return(err);
1269}
1270
Jeff Dike91acb212005-10-10 23:10:32 -04001271static int update_bitmap(struct io_thread_req *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272{
Jeff Dike91acb212005-10-10 23:10:32 -04001273 int n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274
Jeff Dike91acb212005-10-10 23:10:32 -04001275 if(req->cow_offset == -1)
1276 return(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277
Jeff Dike91acb212005-10-10 23:10:32 -04001278 n = os_seek_file(req->fds[1], req->cow_offset);
1279 if(n < 0){
1280 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1281 return(1);
1282 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283
Jeff Dike91acb212005-10-10 23:10:32 -04001284 n = os_write_file(req->fds[1], &req->bitmap_words,
1285 sizeof(req->bitmap_words));
1286 if(n != sizeof(req->bitmap_words)){
1287 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1288 req->fds[1]);
1289 return(1);
1290 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291
Jeff Dike91acb212005-10-10 23:10:32 -04001292 return(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293}
Jeff Dike91acb212005-10-10 23:10:32 -04001294
1295void do_io(struct io_thread_req *req)
1296{
1297 char *buf;
1298 unsigned long len;
1299 int n, nsectors, start, end, bit;
1300 int err;
1301 __u64 off;
1302
1303 nsectors = req->length / req->sectorsize;
1304 start = 0;
1305 do {
1306 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1307 end = start;
1308 while((end < nsectors) &&
1309 (ubd_test_bit(end, (unsigned char *)
1310 &req->sector_mask) == bit))
1311 end++;
1312
1313 off = req->offset + req->offsets[bit] +
1314 start * req->sectorsize;
1315 len = (end - start) * req->sectorsize;
1316 buf = &req->buffer[start * req->sectorsize];
1317
1318 err = os_seek_file(req->fds[bit], off);
1319 if(err < 0){
1320 printk("do_io - lseek failed : err = %d\n", -err);
1321 req->error = 1;
1322 return;
1323 }
1324 if(req->op == UBD_READ){
1325 n = 0;
1326 do {
1327 buf = &buf[n];
1328 len -= n;
1329 n = os_read_file(req->fds[bit], buf, len);
1330 if (n < 0) {
1331 printk("do_io - read failed, err = %d "
1332 "fd = %d\n", -n, req->fds[bit]);
1333 req->error = 1;
1334 return;
1335 }
1336 } while((n < len) && (n != 0));
1337 if (n < len) memset(&buf[n], 0, len - n);
1338 } else {
1339 n = os_write_file(req->fds[bit], buf, len);
1340 if(n != len){
1341 printk("do_io - write failed err = %d "
1342 "fd = %d\n", -n, req->fds[bit]);
1343 req->error = 1;
1344 return;
1345 }
1346 }
1347
1348 start = end;
1349 } while(start < nsectors);
1350
1351 req->error = update_bitmap(req);
1352}
1353
1354/* Changed in start_io_thread, which is serialized by being called only
1355 * from ubd_init, which is an initcall.
1356 */
1357int kernel_fd = -1;
1358
1359/* Only changed by the io thread */
1360int io_count = 0;
1361
1362int io_thread(void *arg)
1363{
1364 struct io_thread_req req;
1365 int n;
1366
1367 ignore_sigwinch_sig();
1368 while(1){
1369 n = os_read_file(kernel_fd, &req, sizeof(req));
1370 if(n != sizeof(req)){
1371 if(n < 0)
1372 printk("io_thread - read failed, fd = %d, "
1373 "err = %d\n", kernel_fd, -n);
1374 else {
1375 printk("io_thread - short read, fd = %d, "
1376 "length = %d\n", kernel_fd, n);
1377 }
1378 continue;
1379 }
1380 io_count++;
1381 do_io(&req);
1382 n = os_write_file(kernel_fd, &req, sizeof(req));
1383 if(n != sizeof(req))
1384 printk("io_thread - write failed, fd = %d, err = %d\n",
1385 kernel_fd, -n);
1386 }
Jeff Dike91acb212005-10-10 23:10:32 -04001387
Jeff Dike1b57e9c2006-01-06 00:18:49 -08001388 return 0;
1389}