blob: 290cec6d69e23e8981b570d973a5800ddf2993fd [file] [log] [blame]
Jeff Dike6c29256c2006-03-27 01:14:37 -08001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
23#include "linux/config.h"
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
28#include "linux/devfs_fs_kernel.h"
29#include "linux/cdrom.h"
30#include "linux/proc_fs.h"
31#include "linux/ctype.h"
32#include "linux/capability.h"
33#include "linux/mm.h"
34#include "linux/vmalloc.h"
35#include "linux/blkpg.h"
36#include "linux/genhd.h"
37#include "linux/spinlock.h"
Russell Kingd052d1b2005-10-29 19:07:23 +010038#include "linux/platform_device.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include "asm/segment.h"
40#include "asm/uaccess.h"
41#include "asm/irq.h"
42#include "asm/types.h"
43#include "asm/tlbflush.h"
44#include "user_util.h"
45#include "mem_user.h"
46#include "kern_util.h"
47#include "kern.h"
48#include "mconsole_kern.h"
49#include "init.h"
50#include "irq_user.h"
51#include "irq_kern.h"
52#include "ubd_user.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070053#include "os.h"
54#include "mem.h"
55#include "mem_kern.h"
56#include "cow.h"
57
Jeff Dike7b9014c2005-05-20 13:59:11 -070058enum ubd_req { UBD_READ, UBD_WRITE };
Linus Torvalds1da177e2005-04-16 15:20:36 -070059
60struct io_thread_req {
Jeff Dike91acb212005-10-10 23:10:32 -040061 enum ubd_req op;
Linus Torvalds1da177e2005-04-16 15:20:36 -070062 int fds[2];
63 unsigned long offsets[2];
64 unsigned long long offset;
65 unsigned long length;
66 char *buffer;
67 int sectorsize;
Jeff Dike91acb212005-10-10 23:10:32 -040068 unsigned long sector_mask;
69 unsigned long long cow_offset;
70 unsigned long bitmap_words[2];
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 int error;
72};
73
Jeff Dike6c29256c2006-03-27 01:14:37 -080074extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
Linus Torvalds1da177e2005-04-16 15:20:36 -070075 char **backing_file_out, int *bitmap_offset_out,
76 unsigned long *bitmap_len_out, int *data_offset_out,
77 int *create_cow_out);
78extern int create_cow_file(char *cow_file, char *backing_file,
79 struct openflags flags, int sectorsize,
80 int alignment, int *bitmap_offset_out,
81 unsigned long *bitmap_len_out,
82 int *data_offset_out);
83extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
Jeff Dike91acb212005-10-10 23:10:32 -040084extern void do_io(struct io_thread_req *req);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085
Jeff Dike91acb212005-10-10 23:10:32 -040086static inline int ubd_test_bit(__u64 bit, unsigned char *data)
Linus Torvalds1da177e2005-04-16 15:20:36 -070087{
88 __u64 n;
89 int bits, off;
90
Jeff Dike91acb212005-10-10 23:10:32 -040091 bits = sizeof(data[0]) * 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 n = bit / bits;
93 off = bit % bits;
Jeff Dike91acb212005-10-10 23:10:32 -040094 return((data[n] & (1 << off)) != 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -070095}
96
Jeff Dike91acb212005-10-10 23:10:32 -040097static inline void ubd_set_bit(__u64 bit, unsigned char *data)
Linus Torvalds1da177e2005-04-16 15:20:36 -070098{
99 __u64 n;
100 int bits, off;
101
Jeff Dike91acb212005-10-10 23:10:32 -0400102 bits = sizeof(data[0]) * 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103 n = bit / bits;
104 off = bit % bits;
Jeff Dike91acb212005-10-10 23:10:32 -0400105 data[n] |= (1 << off);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106}
107/*End stuff from ubd_user.h*/
108
109#define DRIVER_NAME "uml-blkdev"
110
111static DEFINE_SPINLOCK(ubd_io_lock);
112static DEFINE_SPINLOCK(ubd_lock);
113
Jeff Dike91acb212005-10-10 23:10:32 -0400114static void (*do_ubd)(void);
115
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116static int ubd_open(struct inode * inode, struct file * filp);
117static int ubd_release(struct inode * inode, struct file * file);
118static int ubd_ioctl(struct inode * inode, struct file * file,
119 unsigned int cmd, unsigned long arg);
Christoph Hellwiga885c8c2006-01-08 01:02:50 -0800120static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
122#define MAX_DEV (8)
123
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124static struct block_device_operations ubd_blops = {
125 .owner = THIS_MODULE,
126 .open = ubd_open,
127 .release = ubd_release,
128 .ioctl = ubd_ioctl,
Christoph Hellwiga885c8c2006-01-08 01:02:50 -0800129 .getgeo = ubd_getgeo,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130};
131
132/* Protected by the queue_lock */
133static request_queue_t *ubd_queue;
134
135/* Protected by ubd_lock */
136static int fake_major = MAJOR_NR;
137
138static struct gendisk *ubd_gendisk[MAX_DEV];
139static struct gendisk *fake_gendisk[MAX_DEV];
Jeff Dike6c29256c2006-03-27 01:14:37 -0800140
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141#ifdef CONFIG_BLK_DEV_UBD_SYNC
142#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
143 .cl = 1 })
144#else
145#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
146 .cl = 1 })
147#endif
148
149/* Not protected - changed only in ubd_setup_common and then only to
150 * to enable O_SYNC.
151 */
152static struct openflags global_openflags = OPEN_FLAGS;
153
154struct cow {
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700155 /* This is the backing file, actually */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 char *file;
157 int fd;
158 unsigned long *bitmap;
159 unsigned long bitmap_len;
160 int bitmap_offset;
161 int data_offset;
162};
163
164struct ubd {
165 char *file;
166 int count;
167 int fd;
168 __u64 size;
169 struct openflags boot_openflags;
170 struct openflags openflags;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800171 int shared;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 int no_cow;
173 struct cow cow;
174 struct platform_device pdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175};
176
177#define DEFAULT_COW { \
178 .file = NULL, \
179 .fd = -1, \
180 .bitmap = NULL, \
181 .bitmap_offset = 0, \
182 .data_offset = 0, \
183}
184
185#define DEFAULT_UBD { \
186 .file = NULL, \
187 .count = 0, \
188 .fd = -1, \
189 .size = -1, \
190 .boot_openflags = OPEN_FLAGS, \
191 .openflags = OPEN_FLAGS, \
192 .no_cow = 0, \
Jeff Dike6c29256c2006-03-27 01:14:37 -0800193 .shared = 0, \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 .cow = DEFAULT_COW, \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195}
196
197struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
198
199static int ubd0_init(void)
200{
201 struct ubd *dev = &ubd_dev[0];
202
203 if(dev->file == NULL)
204 dev->file = "root_fs";
205 return(0);
206}
207
208__initcall(ubd0_init);
209
210/* Only changed by fake_ide_setup which is a setup */
211static int fake_ide = 0;
212static struct proc_dir_entry *proc_ide_root = NULL;
213static struct proc_dir_entry *proc_ide = NULL;
214
215static void make_proc_ide(void)
216{
217 proc_ide_root = proc_mkdir("ide", NULL);
218 proc_ide = proc_mkdir("ide0", proc_ide_root);
219}
220
221static int proc_ide_read_media(char *page, char **start, off_t off, int count,
222 int *eof, void *data)
223{
224 int len;
225
226 strcpy(page, "disk\n");
227 len = strlen("disk\n");
228 len -= off;
229 if (len < count){
230 *eof = 1;
231 if (len <= 0) return 0;
232 }
233 else len = count;
234 *start = page + off;
235 return len;
236}
237
238static void make_ide_entries(char *dev_name)
239{
240 struct proc_dir_entry *dir, *ent;
241 char name[64];
242
243 if(proc_ide_root == NULL) make_proc_ide();
244
245 dir = proc_mkdir(dev_name, proc_ide);
246 if(!dir) return;
247
248 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
249 if(!ent) return;
250 ent->nlink = 1;
251 ent->data = NULL;
252 ent->read_proc = proc_ide_read_media;
253 ent->write_proc = NULL;
254 sprintf(name,"ide0/%s", dev_name);
255 proc_symlink(dev_name, proc_ide_root, name);
256}
257
258static int fake_ide_setup(char *str)
259{
260 fake_ide = 1;
261 return(1);
262}
263
264__setup("fake_ide", fake_ide_setup);
265
266__uml_help(fake_ide_setup,
267"fake_ide\n"
268" Create ide0 entries that map onto ubd devices.\n\n"
269);
270
271static int parse_unit(char **ptr)
272{
273 char *str = *ptr, *end;
274 int n = -1;
275
276 if(isdigit(*str)) {
277 n = simple_strtoul(str, &end, 0);
278 if(end == str)
279 return(-1);
280 *ptr = end;
281 }
282 else if (('a' <= *str) && (*str <= 'h')) {
283 n = *str - 'a';
284 str++;
285 *ptr = str;
286 }
287 return(n);
288}
289
290static int ubd_setup_common(char *str, int *index_out)
291{
292 struct ubd *dev;
293 struct openflags flags = global_openflags;
294 char *backing_file;
295 int n, err, i;
296
297 if(index_out) *index_out = -1;
298 n = *str;
299 if(n == '='){
300 char *end;
301 int major;
302
303 str++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 if(!strcmp(str, "sync")){
305 global_openflags = of_sync(global_openflags);
306 return(0);
307 }
308 major = simple_strtoul(str, &end, 0);
309 if((*end != '\0') || (end == str)){
Jeff Dike6c29256c2006-03-27 01:14:37 -0800310 printk(KERN_ERR
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 "ubd_setup : didn't parse major number\n");
312 return(1);
313 }
314
315 err = 1;
316 spin_lock(&ubd_lock);
317 if(fake_major != MAJOR_NR){
318 printk(KERN_ERR "Can't assign a fake major twice\n");
319 goto out1;
320 }
Jeff Dike6c29256c2006-03-27 01:14:37 -0800321
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 fake_major = major;
323
324 printk(KERN_INFO "Setting extra ubd major number to %d\n",
325 major);
326 err = 0;
327 out1:
328 spin_unlock(&ubd_lock);
329 return(err);
330 }
331
332 n = parse_unit(&str);
333 if(n < 0){
334 printk(KERN_ERR "ubd_setup : couldn't parse unit number "
335 "'%s'\n", str);
336 return(1);
337 }
338 if(n >= MAX_DEV){
339 printk(KERN_ERR "ubd_setup : index %d out of range "
340 "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
341 return(1);
342 }
343
344 err = 1;
345 spin_lock(&ubd_lock);
346
347 dev = &ubd_dev[n];
348 if(dev->file != NULL){
349 printk(KERN_ERR "ubd_setup : device already configured\n");
350 goto out;
351 }
352
353 if (index_out)
354 *index_out = n;
355
Jeff Dike6c29256c2006-03-27 01:14:37 -0800356 for (i = 0; i < sizeof("rscd="); i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357 switch (*str) {
358 case 'r':
359 flags.w = 0;
360 break;
361 case 's':
362 flags.s = 1;
363 break;
364 case 'd':
365 dev->no_cow = 1;
366 break;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800367 case 'c':
368 dev->shared = 1;
369 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 case '=':
371 str++;
372 goto break_loop;
373 default:
Jeff Dike6c29256c2006-03-27 01:14:37 -0800374 printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r, s, c, or d)\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 goto out;
376 }
377 str++;
378 }
379
380 if (*str == '=')
381 printk(KERN_ERR "ubd_setup : Too many flags specified\n");
382 else
383 printk(KERN_ERR "ubd_setup : Expected '='\n");
384 goto out;
385
386break_loop:
387 err = 0;
388 backing_file = strchr(str, ',');
389
390 if (!backing_file) {
391 backing_file = strchr(str, ':');
392 }
393
394 if(backing_file){
395 if(dev->no_cow)
396 printk(KERN_ERR "Can't specify both 'd' and a "
397 "cow file\n");
398 else {
399 *backing_file = '\0';
400 backing_file++;
401 }
402 }
403 dev->file = str;
404 dev->cow.file = backing_file;
405 dev->boot_openflags = flags;
406out:
407 spin_unlock(&ubd_lock);
408 return(err);
409}
410
411static int ubd_setup(char *str)
412{
413 ubd_setup_common(str, NULL);
414 return(1);
415}
416
417__setup("ubd", ubd_setup);
418__uml_help(ubd_setup,
419"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
420" This is used to associate a device with a file in the underlying\n"
421" filesystem. When specifying two filenames, the first one is the\n"
422" COW name and the second is the backing file name. As separator you can\n"
423" use either a ':' or a ',': the first one allows writing things like;\n"
424" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
425" while with a ',' the shell would not expand the 2nd '~'.\n"
426" When using only one filename, UML will detect whether to thread it like\n"
427" a COW file or a backing file. To override this detection, add the 'd'\n"
428" flag:\n"
429" ubd0d=BackingFile\n"
430" Usually, there is a filesystem in the file, but \n"
431" that's not required. Swap devices containing swap files can be\n"
432" specified like this. Also, a file which doesn't contain a\n"
433" filesystem can have its contents read in the virtual \n"
434" machine by running 'dd' on the device. <n> must be in the range\n"
435" 0 to 7. Appending an 'r' to the number will cause that device\n"
436" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
437" an 's' will cause data to be written to disk on the host immediately.\n\n"
438);
439
440static int udb_setup(char *str)
441{
442 printk("udb%s specified on command line is almost certainly a ubd -> "
443 "udb TYPO\n", str);
444 return(1);
445}
446
447__setup("udb", udb_setup);
448__uml_help(udb_setup,
449"udb\n"
Jeff Dike0894e272005-05-28 15:51:55 -0700450" This option is here solely to catch ubd -> udb typos, which can be\n"
451" to impossible to catch visually unless you specifically look for\n"
452" them. The only result of any option starting with 'udb' is an error\n"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453" in the boot output.\n\n"
454);
455
456static int fakehd_set = 0;
457static int fakehd(char *str)
458{
459 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
460 fakehd_set = 1;
461 return 1;
462}
463
464__setup("fakehd", fakehd);
465__uml_help(fakehd,
466"fakehd\n"
467" Change the ubd device name to \"hd\".\n\n"
468);
469
470static void do_ubd_request(request_queue_t * q);
Jeff Dike91acb212005-10-10 23:10:32 -0400471
472/* Only changed by ubd_init, which is an initcall. */
473int thread_fd = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474
475/* Changed by ubd_handler, which is serialized because interrupts only
476 * happen on CPU 0.
477 */
478int intr_count = 0;
479
480/* call ubd_finish if you need to serialize */
Jeff Dike91acb212005-10-10 23:10:32 -0400481static void __ubd_finish(struct request *req, int error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482{
Jeff Dike91acb212005-10-10 23:10:32 -0400483 int nsect;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484
Jeff Dike91acb212005-10-10 23:10:32 -0400485 if(error){
486 end_request(req, 0);
487 return;
488 }
489 nsect = req->current_nr_sectors;
490 req->sector += nsect;
491 req->buffer += nsect << 9;
492 req->errors = 0;
493 req->nr_sectors -= nsect;
494 req->current_nr_sectors = 0;
495 end_request(req, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496}
497
Jeff Dike91acb212005-10-10 23:10:32 -0400498static inline void ubd_finish(struct request *req, int error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499{
Jeff Dike91acb212005-10-10 23:10:32 -0400500 spin_lock(&ubd_io_lock);
501 __ubd_finish(req, error);
502 spin_unlock(&ubd_io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503}
504
Jeff Dike91acb212005-10-10 23:10:32 -0400505/* Called without ubd_io_lock held */
506static void ubd_handler(void)
507{
508 struct io_thread_req req;
509 struct request *rq = elv_next_request(ubd_queue);
510 int n;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511
Jeff Dike91acb212005-10-10 23:10:32 -0400512 do_ubd = NULL;
513 intr_count++;
514 n = os_read_file(thread_fd, &req, sizeof(req));
515 if(n != sizeof(req)){
516 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
517 "err = %d\n", os_getpid(), -n);
518 spin_lock(&ubd_io_lock);
519 end_request(rq, 0);
520 spin_unlock(&ubd_io_lock);
521 return;
522 }
Jeff Dike6c29256c2006-03-27 01:14:37 -0800523
Jeff Dike91acb212005-10-10 23:10:32 -0400524 ubd_finish(rq, req.error);
525 reactivate_fd(thread_fd, UBD_IRQ);
526 do_ubd_request(ubd_queue);
527}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528
529static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
530{
Jeff Dike91acb212005-10-10 23:10:32 -0400531 ubd_handler();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 return(IRQ_HANDLED);
533}
534
Jeff Dike91acb212005-10-10 23:10:32 -0400535/* Only changed by ubd_init, which is an initcall. */
536static int io_pid = -1;
537
538void kill_io_thread(void)
539{
Jeff Dike6c29256c2006-03-27 01:14:37 -0800540 if(io_pid != -1)
Jeff Dike91acb212005-10-10 23:10:32 -0400541 os_kill_process(io_pid, 1);
542}
543
544__uml_exitcall(kill_io_thread);
545
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546static int ubd_file_size(struct ubd *dev, __u64 *size_out)
547{
548 char *file;
549
550 file = dev->cow.file ? dev->cow.file : dev->file;
551 return(os_file_size(file, size_out));
552}
553
554static void ubd_close(struct ubd *dev)
555{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556 os_close_file(dev->fd);
557 if(dev->cow.file == NULL)
558 return;
559
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560 os_close_file(dev->cow.fd);
561 vfree(dev->cow.bitmap);
562 dev->cow.bitmap = NULL;
563}
564
565static int ubd_open_dev(struct ubd *dev)
566{
567 struct openflags flags;
568 char **back_ptr;
569 int err, create_cow, *create_ptr;
570
571 dev->openflags = dev->boot_openflags;
572 create_cow = 0;
573 create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
574 back_ptr = dev->no_cow ? NULL : &dev->cow.file;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800575 dev->fd = open_ubd_file(dev->file, &dev->openflags, dev->shared,
576 back_ptr, &dev->cow.bitmap_offset,
577 &dev->cow.bitmap_len, &dev->cow.data_offset,
578 create_ptr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579
580 if((dev->fd == -ENOENT) && create_cow){
Jeff Dike6c29256c2006-03-27 01:14:37 -0800581 dev->fd = create_cow_file(dev->file, dev->cow.file,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 dev->openflags, 1 << 9, PAGE_SIZE,
Jeff Dike6c29256c2006-03-27 01:14:37 -0800583 &dev->cow.bitmap_offset,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 &dev->cow.bitmap_len,
585 &dev->cow.data_offset);
586 if(dev->fd >= 0){
587 printk(KERN_INFO "Creating \"%s\" as COW file for "
588 "\"%s\"\n", dev->file, dev->cow.file);
589 }
590 }
591
592 if(dev->fd < 0){
593 printk("Failed to open '%s', errno = %d\n", dev->file,
594 -dev->fd);
595 return(dev->fd);
596 }
597
598 if(dev->cow.file != NULL){
599 err = -ENOMEM;
600 dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
601 if(dev->cow.bitmap == NULL){
602 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
603 goto error;
604 }
605 flush_tlb_kernel_vm();
606
Jeff Dike6c29256c2006-03-27 01:14:37 -0800607 err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
608 dev->cow.bitmap_offset,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 dev->cow.bitmap_len);
610 if(err < 0)
611 goto error;
612
613 flags = dev->openflags;
614 flags.w = 0;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800615 err = open_ubd_file(dev->cow.file, &flags, dev->shared, NULL,
616 NULL, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 if(err < 0) goto error;
618 dev->cow.fd = err;
619 }
620 return(0);
621 error:
622 os_close_file(dev->fd);
623 return(err);
624}
625
626static int ubd_new_disk(int major, u64 size, int unit,
627 struct gendisk **disk_out)
628
629{
630 struct gendisk *disk;
631 char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
632 int err;
633
634 disk = alloc_disk(1 << UBD_SHIFT);
635 if(disk == NULL)
636 return(-ENOMEM);
637
638 disk->major = major;
639 disk->first_minor = unit << UBD_SHIFT;
640 disk->fops = &ubd_blops;
641 set_capacity(disk, size / 512);
642 if(major == MAJOR_NR){
643 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
644 sprintf(disk->devfs_name, "ubd/disc%d", unit);
645 sprintf(from, "ubd/%d", unit);
646 sprintf(to, "disc%d/disc", unit);
647 err = devfs_mk_symlink(from, to);
648 if(err)
649 printk("ubd_new_disk failed to make link from %s to "
650 "%s, error = %d\n", from, to, err);
651 }
652 else {
653 sprintf(disk->disk_name, "ubd_fake%d", unit);
654 sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
655 }
656
657 /* sysfs register (not for ide fake devices) */
658 if (major == MAJOR_NR) {
659 ubd_dev[unit].pdev.id = unit;
660 ubd_dev[unit].pdev.name = DRIVER_NAME;
661 platform_device_register(&ubd_dev[unit].pdev);
662 disk->driverfs_dev = &ubd_dev[unit].pdev.dev;
663 }
664
665 disk->private_data = &ubd_dev[unit];
666 disk->queue = ubd_queue;
667 add_disk(disk);
668
669 *disk_out = disk;
670 return 0;
671}
672
673#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
674
675static int ubd_add(int n)
676{
677 struct ubd *dev = &ubd_dev[n];
678 int err;
679
Jeff Dikeec7cf782005-09-03 15:57:29 -0700680 err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 if(dev->file == NULL)
Jeff Dikeec7cf782005-09-03 15:57:29 -0700682 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683
684 if (ubd_open_dev(dev))
Jeff Dikeec7cf782005-09-03 15:57:29 -0700685 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
687 err = ubd_file_size(dev, &dev->size);
688 if(err < 0)
Jeff Dikeec7cf782005-09-03 15:57:29 -0700689 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690
691 dev->size = ROUND_BLOCK(dev->size);
692
693 err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
Jeff Dike6c29256c2006-03-27 01:14:37 -0800694 if(err)
Jeff Dikeec7cf782005-09-03 15:57:29 -0700695 goto out_close;
Jeff Dike6c29256c2006-03-27 01:14:37 -0800696
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 if(fake_major != MAJOR_NR)
Jeff Dike6c29256c2006-03-27 01:14:37 -0800698 ubd_new_disk(fake_major, dev->size, n,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 &fake_gendisk[n]);
700
701 /* perhaps this should also be under the "if (fake_major)" above */
702 /* using the fake_disk->disk_name and also the fakehd_set name */
703 if (fake_ide)
704 make_ide_entries(ubd_gendisk[n]->disk_name);
705
Jeff Dikeec7cf782005-09-03 15:57:29 -0700706 err = 0;
707out_close:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 ubd_close(dev);
Jeff Dikeec7cf782005-09-03 15:57:29 -0700709out:
710 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711}
712
713static int ubd_config(char *str)
714{
715 int n, err;
716
Jeff Dike970d6e32006-01-06 00:18:48 -0800717 str = kstrdup(str, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 if(str == NULL){
719 printk(KERN_ERR "ubd_config failed to strdup string\n");
720 return(1);
721 }
722 err = ubd_setup_common(str, &n);
723 if(err){
724 kfree(str);
725 return(-1);
726 }
727 if(n == -1) return(0);
728
729 spin_lock(&ubd_lock);
730 err = ubd_add(n);
731 if(err)
732 ubd_dev[n].file = NULL;
733 spin_unlock(&ubd_lock);
734
735 return(err);
736}
737
738static int ubd_get_config(char *name, char *str, int size, char **error_out)
739{
740 struct ubd *dev;
741 int n, len = 0;
742
743 n = parse_unit(&name);
744 if((n >= MAX_DEV) || (n < 0)){
745 *error_out = "ubd_get_config : device number out of range";
746 return(-1);
747 }
748
749 dev = &ubd_dev[n];
750 spin_lock(&ubd_lock);
751
752 if(dev->file == NULL){
753 CONFIG_CHUNK(str, size, len, "", 1);
754 goto out;
755 }
756
757 CONFIG_CHUNK(str, size, len, dev->file, 0);
758
759 if(dev->cow.file != NULL){
760 CONFIG_CHUNK(str, size, len, ",", 0);
761 CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
762 }
763 else CONFIG_CHUNK(str, size, len, "", 1);
764
765 out:
766 spin_unlock(&ubd_lock);
767 return(len);
768}
769
Jeff Dike29d56cf2005-06-25 14:55:25 -0700770static int ubd_id(char **str, int *start_out, int *end_out)
771{
772 int n;
773
774 n = parse_unit(str);
775 *start_out = 0;
776 *end_out = MAX_DEV - 1;
777 return n;
778}
779
780static int ubd_remove(int n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781{
782 struct ubd *dev;
Jeff Dike29d56cf2005-06-25 14:55:25 -0700783 int err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784
Jeff Dike29d56cf2005-06-25 14:55:25 -0700785 spin_lock(&ubd_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786
787 if(ubd_gendisk[n] == NULL)
788 goto out;
789
Jeff Dike29d56cf2005-06-25 14:55:25 -0700790 dev = &ubd_dev[n];
791
792 if(dev->file == NULL)
793 goto out;
794
795 /* you cannot remove a open disk */
796 err = -EBUSY;
797 if(dev->count > 0)
798 goto out;
799
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 del_gendisk(ubd_gendisk[n]);
801 put_disk(ubd_gendisk[n]);
802 ubd_gendisk[n] = NULL;
803
804 if(fake_gendisk[n] != NULL){
805 del_gendisk(fake_gendisk[n]);
806 put_disk(fake_gendisk[n]);
807 fake_gendisk[n] = NULL;
808 }
809
810 platform_device_unregister(&dev->pdev);
811 *dev = ((struct ubd) DEFAULT_UBD);
812 err = 0;
Jeff Dike29d56cf2005-06-25 14:55:25 -0700813out:
814 spin_unlock(&ubd_lock);
815 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816}
817
818static struct mc_device ubd_mc = {
819 .name = "ubd",
820 .config = ubd_config,
821 .get_config = ubd_get_config,
Jeff Dike29d56cf2005-06-25 14:55:25 -0700822 .id = ubd_id,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 .remove = ubd_remove,
824};
825
826static int ubd_mc_init(void)
827{
828 mconsole_register_dev(&ubd_mc);
829 return 0;
830}
831
832__initcall(ubd_mc_init);
833
Russell King3ae5eae2005-11-09 22:32:44 +0000834static struct platform_driver ubd_driver = {
835 .driver = {
836 .name = DRIVER_NAME,
837 },
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838};
839
840int ubd_init(void)
841{
842 int i;
843
844 devfs_mk_dir("ubd");
845 if (register_blkdev(MAJOR_NR, "ubd"))
846 return -1;
847
848 ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
849 if (!ubd_queue) {
850 unregister_blkdev(MAJOR_NR, "ubd");
851 return -1;
852 }
853
854 if (fake_major != MAJOR_NR) {
855 char name[sizeof("ubd_nnn\0")];
856
857 snprintf(name, sizeof(name), "ubd_%d", fake_major);
858 devfs_mk_dir(name);
859 if (register_blkdev(fake_major, "ubd"))
860 return -1;
861 }
Russell King3ae5eae2005-11-09 22:32:44 +0000862 platform_driver_register(&ubd_driver);
Jeff Dike6c29256c2006-03-27 01:14:37 -0800863 for (i = 0; i < MAX_DEV; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 ubd_add(i);
865 return 0;
866}
867
868late_initcall(ubd_init);
869
Jeff Dike91acb212005-10-10 23:10:32 -0400870int ubd_driver_init(void){
871 unsigned long stack;
872 int err;
873
874 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
875 if(global_openflags.s){
876 printk(KERN_INFO "ubd: Synchronous mode\n");
877 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
878 * enough. So use anyway the io thread. */
879 }
880 stack = alloc_stack(0, 0);
Jeff Dike6c29256c2006-03-27 01:14:37 -0800881 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
Jeff Dike91acb212005-10-10 23:10:32 -0400882 &thread_fd);
883 if(io_pid < 0){
Jeff Dike6c29256c2006-03-27 01:14:37 -0800884 printk(KERN_ERR
Jeff Dike91acb212005-10-10 23:10:32 -0400885 "ubd : Failed to start I/O thread (errno = %d) - "
886 "falling back to synchronous I/O\n", -io_pid);
887 io_pid = -1;
888 return(0);
889 }
Jeff Dike6c29256c2006-03-27 01:14:37 -0800890 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
Jeff Dike91acb212005-10-10 23:10:32 -0400891 SA_INTERRUPT, "ubd", ubd_dev);
892 if(err != 0)
893 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
Jeff Dikef4c57a72006-03-31 02:30:10 -0800894 return 0;
Jeff Dike91acb212005-10-10 23:10:32 -0400895}
896
897device_initcall(ubd_driver_init);
898
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899static int ubd_open(struct inode *inode, struct file *filp)
900{
901 struct gendisk *disk = inode->i_bdev->bd_disk;
902 struct ubd *dev = disk->private_data;
903 int err = 0;
904
905 if(dev->count == 0){
906 err = ubd_open_dev(dev);
907 if(err){
908 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
909 disk->disk_name, dev->file, -err);
910 goto out;
911 }
912 }
913 dev->count++;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700914 set_disk_ro(disk, !dev->openflags.w);
915
916 /* This should no more be needed. And it didn't work anyway to exclude
917 * read-write remounting of filesystems.*/
918 /*if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 if(--dev->count == 0) ubd_close(dev);
920 err = -EROFS;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700921 }*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 out:
923 return(err);
924}
925
926static int ubd_release(struct inode * inode, struct file * file)
927{
928 struct gendisk *disk = inode->i_bdev->bd_disk;
929 struct ubd *dev = disk->private_data;
930
931 if(--dev->count == 0)
932 ubd_close(dev);
933 return(0);
934}
935
Jeff Dike91acb212005-10-10 23:10:32 -0400936static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
937 __u64 *cow_offset, unsigned long *bitmap,
938 __u64 bitmap_offset, unsigned long *bitmap_words,
939 __u64 bitmap_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940{
Jeff Dike91acb212005-10-10 23:10:32 -0400941 __u64 sector = io_offset >> 9;
942 int i, update_bitmap = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943
Jeff Dike91acb212005-10-10 23:10:32 -0400944 for(i = 0; i < length >> 9; i++){
945 if(cow_mask != NULL)
946 ubd_set_bit(i, (unsigned char *) cow_mask);
947 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
948 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949
Jeff Dike91acb212005-10-10 23:10:32 -0400950 update_bitmap = 1;
951 ubd_set_bit(sector + i, (unsigned char *) bitmap);
952 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953
Jeff Dike91acb212005-10-10 23:10:32 -0400954 if(!update_bitmap)
955 return;
956
957 *cow_offset = sector / (sizeof(unsigned long) * 8);
958
959 /* This takes care of the case where we're exactly at the end of the
960 * device, and *cow_offset + 1 is off the end. So, just back it up
961 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
962 * for the original diagnosis.
963 */
964 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
965 sizeof(unsigned long) - 1))
966 (*cow_offset)--;
967
968 bitmap_words[0] = bitmap[*cow_offset];
969 bitmap_words[1] = bitmap[*cow_offset + 1];
970
971 *cow_offset *= sizeof(unsigned long);
972 *cow_offset += bitmap_offset;
973}
974
975static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
976 __u64 bitmap_offset, __u64 bitmap_len)
977{
978 __u64 sector = req->offset >> 9;
979 int i;
980
981 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
982 panic("Operation too long");
983
984 if(req->op == UBD_READ) {
985 for(i = 0; i < req->length >> 9; i++){
986 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
Jeff Dike6c29256c2006-03-27 01:14:37 -0800987 ubd_set_bit(i, (unsigned char *)
Jeff Dike91acb212005-10-10 23:10:32 -0400988 &req->sector_mask);
989 }
990 }
991 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
992 &req->cow_offset, bitmap, bitmap_offset,
993 req->bitmap_words, bitmap_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994}
995
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996/* Called with ubd_io_lock held */
Jeff Dike91acb212005-10-10 23:10:32 -0400997static int prepare_request(struct request *req, struct io_thread_req *io_req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998{
999 struct gendisk *disk = req->rq_disk;
1000 struct ubd *dev = disk->private_data;
Jeff Dike91acb212005-10-10 23:10:32 -04001001 __u64 offset;
1002 int len;
1003
1004 if(req->rq_status == RQ_INACTIVE) return(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -07001006 /* This should be impossible now */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
Jeff Dike6c29256c2006-03-27 01:14:37 -08001008 printk("Write attempted on readonly ubd device %s\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 disk->disk_name);
Jeff Dike91acb212005-10-10 23:10:32 -04001010 end_request(req, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 return(1);
1012 }
1013
Jeff Dike91acb212005-10-10 23:10:32 -04001014 offset = ((__u64) req->sector) << 9;
1015 len = req->current_nr_sectors << 9;
1016
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
1018 io_req->fds[1] = dev->fd;
Jeff Dike91acb212005-10-10 23:10:32 -04001019 io_req->cow_offset = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 io_req->offset = offset;
1021 io_req->length = len;
1022 io_req->error = 0;
Jeff Dike91acb212005-10-10 23:10:32 -04001023 io_req->sector_mask = 0;
1024
1025 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 io_req->offsets[0] = 0;
1027 io_req->offsets[1] = dev->cow.data_offset;
Jeff Dike91acb212005-10-10 23:10:32 -04001028 io_req->buffer = req->buffer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 io_req->sectorsize = 1 << 9;
1030
Jeff Dike91acb212005-10-10 23:10:32 -04001031 if(dev->cow.file != NULL)
1032 cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
1033 dev->cow.bitmap_len);
1034
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035 return(0);
1036}
1037
1038/* Called with ubd_io_lock held */
1039static void do_ubd_request(request_queue_t *q)
1040{
1041 struct io_thread_req io_req;
1042 struct request *req;
Jeff Dike91acb212005-10-10 23:10:32 -04001043 int err, n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044
Jeff Dike91acb212005-10-10 23:10:32 -04001045 if(thread_fd == -1){
1046 while((req = elv_next_request(q)) != NULL){
1047 err = prepare_request(req, &io_req);
1048 if(!err){
1049 do_io(&io_req);
1050 __ubd_finish(req, io_req.error);
1051 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 }
1053 }
Jeff Dike91acb212005-10-10 23:10:32 -04001054 else {
1055 if(do_ubd || (req = elv_next_request(q)) == NULL)
1056 return;
1057 err = prepare_request(req, &io_req);
1058 if(!err){
1059 do_ubd = ubd_handler;
1060 n = os_write_file(thread_fd, (char *) &io_req,
1061 sizeof(io_req));
1062 if(n != sizeof(io_req))
1063 printk("write to io thread failed, "
1064 "errno = %d\n", -n);
1065 }
1066 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067}
1068
Christoph Hellwiga885c8c2006-01-08 01:02:50 -08001069static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1070{
1071 struct ubd *dev = bdev->bd_disk->private_data;
1072
1073 geo->heads = 128;
1074 geo->sectors = 32;
1075 geo->cylinders = dev->size / (128 * 32 * 512);
1076 return 0;
1077}
1078
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079static int ubd_ioctl(struct inode * inode, struct file * file,
1080 unsigned int cmd, unsigned long arg)
1081{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082 struct ubd *dev = inode->i_bdev->bd_disk->private_data;
1083 struct hd_driveid ubd_id = {
1084 .cyls = 0,
1085 .heads = 128,
1086 .sectors = 32,
1087 };
1088
1089 switch (cmd) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 struct cdrom_volctrl volume;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 case HDIO_GET_IDENTITY:
1092 ubd_id.cyls = dev->size / (128 * 32 * 512);
1093 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1094 sizeof(ubd_id)))
1095 return(-EFAULT);
1096 return(0);
1097
1098 case CDROMVOLREAD:
1099 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1100 return(-EFAULT);
1101 volume.channel0 = 255;
1102 volume.channel1 = 255;
1103 volume.channel2 = 255;
1104 volume.channel3 = 255;
1105 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1106 return(-EFAULT);
1107 return(0);
1108 }
1109 return(-EINVAL);
1110}
1111
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001112static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113{
1114 struct uml_stat buf1, buf2;
1115 int err;
1116
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001117 if(from_cmdline == NULL)
1118 return 0;
1119 if(!strcmp(from_cmdline, from_cow))
1120 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121
1122 err = os_stat_file(from_cmdline, &buf1);
1123 if(err < 0){
1124 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001125 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 }
1127 err = os_stat_file(from_cow, &buf2);
1128 if(err < 0){
1129 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001130 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 }
1132 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001133 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134
1135 printk("Backing file mismatch - \"%s\" requested,\n"
1136 "\"%s\" specified in COW header of \"%s\"\n",
1137 from_cmdline, from_cow, cow);
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001138 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139}
1140
1141static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1142{
1143 unsigned long modtime;
Paolo 'Blaisorblade' Giarrussofe1db502006-02-24 13:03:58 -08001144 unsigned long long actual;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 int err;
1146
1147 err = os_file_modtime(file, &modtime);
1148 if(err < 0){
1149 printk("Failed to get modification time of backing file "
1150 "\"%s\", err = %d\n", file, -err);
1151 return(err);
1152 }
1153
1154 err = os_file_size(file, &actual);
1155 if(err < 0){
1156 printk("Failed to get size of backing file \"%s\", "
1157 "err = %d\n", file, -err);
1158 return(err);
1159 }
1160
1161 if(actual != size){
1162 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1163 * the typecast.*/
1164 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1165 "file\n", (unsigned long long) size, actual);
1166 return(-EINVAL);
1167 }
1168 if(modtime != mtime){
1169 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1170 "file\n", mtime, modtime);
1171 return(-EINVAL);
1172 }
1173 return(0);
1174}
1175
1176int read_cow_bitmap(int fd, void *buf, int offset, int len)
1177{
1178 int err;
1179
1180 err = os_seek_file(fd, offset);
1181 if(err < 0)
1182 return(err);
1183
1184 err = os_read_file(fd, buf, len);
1185 if(err < 0)
1186 return(err);
1187
1188 return(0);
1189}
1190
Jeff Dike6c29256c2006-03-27 01:14:37 -08001191int open_ubd_file(char *file, struct openflags *openflags, int shared,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 char **backing_file_out, int *bitmap_offset_out,
1193 unsigned long *bitmap_len_out, int *data_offset_out,
1194 int *create_cow_out)
1195{
1196 time_t mtime;
1197 unsigned long long size;
1198 __u32 version, align;
1199 char *backing_file;
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001200 int fd, err, sectorsize, asked_switch, mode = 0644;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201
1202 fd = os_open_file(file, *openflags, mode);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001203 if (fd < 0) {
1204 if ((fd == -ENOENT) && (create_cow_out != NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 *create_cow_out = 1;
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001206 if (!openflags->w ||
1207 ((fd != -EROFS) && (fd != -EACCES)))
1208 return fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 openflags->w = 0;
1210 fd = os_open_file(file, *openflags, mode);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001211 if (fd < 0)
1212 return fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 }
1214
Jeff Dike6c29256c2006-03-27 01:14:37 -08001215 if(shared)
1216 printk("Not locking \"%s\" on the host\n", file);
1217 else {
1218 err = os_lock_file(fd, openflags->w);
1219 if(err < 0){
1220 printk("Failed to lock '%s', err = %d\n", file, -err);
1221 goto out_close;
1222 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 }
1224
Andreas Mohrd6e05ed2006-06-26 18:35:02 +02001225 /* Successful return case! */
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001226 if(backing_file_out == NULL)
1227 return(fd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228
1229 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1230 &size, &sectorsize, &align, bitmap_offset_out);
1231 if(err && (*backing_file_out != NULL)){
1232 printk("Failed to read COW header from COW file \"%s\", "
1233 "errno = %d\n", file, -err);
1234 goto out_close;
1235 }
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001236 if(err)
1237 return(fd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001239 asked_switch = path_requires_switch(*backing_file_out, backing_file, file);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001241 /* Allow switching only if no mismatch. */
1242 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 printk("Switching backing file to '%s'\n", *backing_file_out);
1244 err = write_cow_header(file, fd, *backing_file_out,
1245 sectorsize, align, &size);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001246 if (err) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 printk("Switch failed, errno = %d\n", -err);
Paolo 'Blaisorblade' Giarrusso4833aff2006-01-18 17:43:00 -08001248 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 }
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001250 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 *backing_file_out = backing_file;
1252 err = backing_file_mismatch(*backing_file_out, size, mtime);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001253 if (err)
1254 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255 }
1256
1257 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1258 bitmap_len_out, data_offset_out);
1259
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001260 return fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261 out_close:
1262 os_close_file(fd);
Paolo 'Blaisorblade' Giarrussoa374a482006-01-18 17:43:01 -08001263 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264}
1265
1266int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1267 int sectorsize, int alignment, int *bitmap_offset_out,
1268 unsigned long *bitmap_len_out, int *data_offset_out)
1269{
1270 int err, fd;
1271
1272 flags.c = 1;
Jeff Dike6c29256c2006-03-27 01:14:37 -08001273 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274 if(fd < 0){
1275 err = fd;
1276 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1277 -err);
1278 goto out;
1279 }
1280
1281 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1282 bitmap_offset_out, bitmap_len_out,
1283 data_offset_out);
1284 if(!err)
1285 return(fd);
1286 os_close_file(fd);
1287 out:
1288 return(err);
1289}
1290
Jeff Dike91acb212005-10-10 23:10:32 -04001291static int update_bitmap(struct io_thread_req *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292{
Jeff Dike91acb212005-10-10 23:10:32 -04001293 int n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294
Jeff Dike91acb212005-10-10 23:10:32 -04001295 if(req->cow_offset == -1)
1296 return(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297
Jeff Dike91acb212005-10-10 23:10:32 -04001298 n = os_seek_file(req->fds[1], req->cow_offset);
1299 if(n < 0){
1300 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1301 return(1);
1302 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303
Jeff Dike91acb212005-10-10 23:10:32 -04001304 n = os_write_file(req->fds[1], &req->bitmap_words,
1305 sizeof(req->bitmap_words));
1306 if(n != sizeof(req->bitmap_words)){
1307 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1308 req->fds[1]);
1309 return(1);
1310 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311
Jeff Dike91acb212005-10-10 23:10:32 -04001312 return(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313}
Jeff Dike91acb212005-10-10 23:10:32 -04001314
1315void do_io(struct io_thread_req *req)
1316{
1317 char *buf;
1318 unsigned long len;
1319 int n, nsectors, start, end, bit;
1320 int err;
1321 __u64 off;
1322
1323 nsectors = req->length / req->sectorsize;
1324 start = 0;
1325 do {
1326 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1327 end = start;
1328 while((end < nsectors) &&
1329 (ubd_test_bit(end, (unsigned char *)
1330 &req->sector_mask) == bit))
1331 end++;
1332
1333 off = req->offset + req->offsets[bit] +
1334 start * req->sectorsize;
1335 len = (end - start) * req->sectorsize;
1336 buf = &req->buffer[start * req->sectorsize];
1337
1338 err = os_seek_file(req->fds[bit], off);
1339 if(err < 0){
1340 printk("do_io - lseek failed : err = %d\n", -err);
1341 req->error = 1;
1342 return;
1343 }
1344 if(req->op == UBD_READ){
1345 n = 0;
1346 do {
1347 buf = &buf[n];
1348 len -= n;
1349 n = os_read_file(req->fds[bit], buf, len);
1350 if (n < 0) {
1351 printk("do_io - read failed, err = %d "
1352 "fd = %d\n", -n, req->fds[bit]);
1353 req->error = 1;
1354 return;
1355 }
1356 } while((n < len) && (n != 0));
1357 if (n < len) memset(&buf[n], 0, len - n);
1358 } else {
1359 n = os_write_file(req->fds[bit], buf, len);
1360 if(n != len){
1361 printk("do_io - write failed err = %d "
1362 "fd = %d\n", -n, req->fds[bit]);
1363 req->error = 1;
1364 return;
1365 }
1366 }
1367
1368 start = end;
1369 } while(start < nsectors);
1370
1371 req->error = update_bitmap(req);
1372}
1373
1374/* Changed in start_io_thread, which is serialized by being called only
1375 * from ubd_init, which is an initcall.
1376 */
1377int kernel_fd = -1;
1378
1379/* Only changed by the io thread */
1380int io_count = 0;
1381
1382int io_thread(void *arg)
1383{
1384 struct io_thread_req req;
1385 int n;
1386
1387 ignore_sigwinch_sig();
1388 while(1){
1389 n = os_read_file(kernel_fd, &req, sizeof(req));
1390 if(n != sizeof(req)){
1391 if(n < 0)
1392 printk("io_thread - read failed, fd = %d, "
1393 "err = %d\n", kernel_fd, -n);
1394 else {
1395 printk("io_thread - short read, fd = %d, "
1396 "length = %d\n", kernel_fd, n);
1397 }
1398 continue;
1399 }
1400 io_count++;
1401 do_io(&req);
1402 n = os_write_file(kernel_fd, &req, sizeof(req));
1403 if(n != sizeof(req))
1404 printk("io_thread - write failed, fd = %d, err = %d\n",
1405 kernel_fd, -n);
1406 }
Jeff Dike91acb212005-10-10 23:10:32 -04001407
Jeff Dike1b57e9c2006-01-06 00:18:49 -08001408 return 0;
1409}