Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 1 | /*D:400 |
| 2 | * The Guest block driver |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 3 | * |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 4 | * This is a simple block driver, which appears as /dev/lgba, lgbb, lgbc etc. |
| 5 | * The mechanism is simple: we place the information about the request in the |
| 6 | * device page, then use SEND_DMA (containing the data for a write, or an empty |
| 7 | * "ping" DMA for a read). |
| 8 | :*/ |
| 9 | /* Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 10 | * |
| 11 | * This program is free software; you can redistribute it and/or modify |
| 12 | * it under the terms of the GNU General Public License as published by |
| 13 | * the Free Software Foundation; either version 2 of the License, or |
| 14 | * (at your option) any later version. |
| 15 | * |
| 16 | * This program is distributed in the hope that it will be useful, |
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 19 | * GNU General Public License for more details. |
| 20 | * |
| 21 | * You should have received a copy of the GNU General Public License |
| 22 | * along with this program; if not, write to the Free Software |
| 23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 24 | */ |
| 25 | //#define DEBUG |
| 26 | #include <linux/init.h> |
| 27 | #include <linux/types.h> |
| 28 | #include <linux/blkdev.h> |
| 29 | #include <linux/interrupt.h> |
| 30 | #include <linux/lguest_bus.h> |
| 31 | |
| 32 | static char next_block_index = 'a'; |
| 33 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 34 | /*D:420 Here is the structure which holds all the information we need about |
| 35 | * each Guest block device. |
| 36 | * |
| 37 | * I'm sure at this stage, you're wondering "hey, where was the adventure I was |
| 38 | * promised?" and thinking "Rusty sucks, I shall say nasty things about him on |
| 39 | * my blog". I think Real adventures have boring bits, too, and you're in the |
| 40 | * middle of one. But it gets better. Just not quite yet. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 41 | struct blockdev |
| 42 | { |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 43 | /* The block queue infrastructure wants a spinlock: it is held while it |
| 44 | * calls our block request function. We grab it in our interrupt |
| 45 | * handler so the responses don't mess with new requests. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 46 | spinlock_t lock; |
| 47 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 48 | /* The disk structure registered with kernel. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 49 | struct gendisk *disk; |
| 50 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 51 | /* The major device number for this disk, and the interrupt. We only |
| 52 | * really keep them here for completeness; we'd need them if we |
| 53 | * supported device unplugging. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 54 | int major; |
| 55 | int irq; |
| 56 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 57 | /* The physical address of this device's memory page */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 58 | unsigned long phys_addr; |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 59 | /* The mapped memory page for convenient acces. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 60 | struct lguest_block_page *lb_page; |
| 61 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 62 | /* We only have a single request outstanding at a time: this is it. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 63 | struct lguest_dma dma; |
| 64 | struct request *req; |
| 65 | }; |
| 66 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 67 | /*D:495 We originally used end_request() throughout the driver, but it turns |
| 68 | * out that end_request() is deprecated, and doesn't actually end the request |
| 69 | * (which seems like a good reason to deprecate it!). It simply ends the first |
| 70 | * bio. So if we had 3 bios in a "struct request" we would do all 3, |
| 71 | * end_request(), do 2, end_request(), do 1 and end_request(): twice as much |
| 72 | * work as we needed to do. |
| 73 | * |
| 74 | * This reinforced to me that I do not understand the block layer. |
| 75 | * |
| 76 | * Nonetheless, Jens Axboe gave me this nice helper to end all chunks of a |
| 77 | * request. This improved disk speed by 130%. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 78 | static void end_entire_request(struct request *req, int uptodate) |
| 79 | { |
| 80 | if (end_that_request_first(req, uptodate, req->hard_nr_sectors)) |
| 81 | BUG(); |
| 82 | add_disk_randomness(req->rq_disk); |
| 83 | blkdev_dequeue_request(req); |
| 84 | end_that_request_last(req, uptodate); |
| 85 | } |
| 86 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 87 | /* I'm told there are only two stories in the world worth telling: love and |
| 88 | * hate. So there used to be a love scene here like this: |
| 89 | * |
| 90 | * Launcher: We could make beautiful I/O together, you and I. |
| 91 | * Guest: My, that's a big disk! |
| 92 | * |
| 93 | * Unfortunately, it was just too raunchy for our otherwise-gentle tale. */ |
| 94 | |
| 95 | /*D:490 This is the interrupt handler, called when a block read or write has |
| 96 | * been completed for us. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 97 | static irqreturn_t lgb_irq(int irq, void *_bd) |
| 98 | { |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 99 | /* We handed our "struct blockdev" as the argument to request_irq(), so |
| 100 | * it is passed through to us here. This tells us which device we're |
| 101 | * dealing with in case we have more than one. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 102 | struct blockdev *bd = _bd; |
| 103 | unsigned long flags; |
| 104 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 105 | /* We weren't doing anything? Strange, but could happen if we shared |
| 106 | * interrupts (we don't!). */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 107 | if (!bd->req) { |
| 108 | pr_debug("No work!\n"); |
| 109 | return IRQ_NONE; |
| 110 | } |
| 111 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 112 | /* Not done yet? That's equally strange. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 113 | if (!bd->lb_page->result) { |
| 114 | pr_debug("No result!\n"); |
| 115 | return IRQ_NONE; |
| 116 | } |
| 117 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 118 | /* We have to grab the lock before ending the request. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 119 | spin_lock_irqsave(&bd->lock, flags); |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 120 | /* "result" is 1 for success, 2 for failure: end_entire_request() wants |
| 121 | * to know whether this succeeded or not. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 122 | end_entire_request(bd->req, bd->lb_page->result == 1); |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 123 | /* Clear out request, it's done. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 124 | bd->req = NULL; |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 125 | /* Reset incoming DMA for next time. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 126 | bd->dma.used_len = 0; |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 127 | /* Ready for more reads or writes */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 128 | blk_start_queue(bd->disk->queue); |
| 129 | spin_unlock_irqrestore(&bd->lock, flags); |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 130 | |
| 131 | /* The interrupt was for us, we dealt with it. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 132 | return IRQ_HANDLED; |
| 133 | } |
| 134 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 135 | /*D:480 The block layer's "struct request" contains a number of "struct bio"s, |
| 136 | * each of which contains "struct bio_vec"s, each of which contains a page, an |
| 137 | * offset and a length. |
| 138 | * |
| 139 | * Fortunately there are iterators to help us walk through the "struct |
| 140 | * request". Even more fortunately, there were plenty of places to steal the |
| 141 | * code from. We pack the "struct request" into our "struct lguest_dma" and |
| 142 | * return the total length. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 143 | static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma) |
| 144 | { |
| 145 | unsigned int i = 0, idx, len = 0; |
| 146 | struct bio *bio; |
| 147 | |
| 148 | rq_for_each_bio(bio, req) { |
| 149 | struct bio_vec *bvec; |
| 150 | bio_for_each_segment(bvec, bio, idx) { |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 151 | /* We told the block layer not to give us too many. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 152 | BUG_ON(i == LGUEST_MAX_DMA_SECTIONS); |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 153 | /* If we had a zero-length segment, it would look like |
| 154 | * the end of the data referred to by the "struct |
| 155 | * lguest_dma", so make sure that doesn't happen. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 156 | BUG_ON(!bvec->bv_len); |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 157 | /* Convert page & offset to a physical address */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 158 | dma->addr[i] = page_to_phys(bvec->bv_page) |
| 159 | + bvec->bv_offset; |
| 160 | dma->len[i] = bvec->bv_len; |
| 161 | len += bvec->bv_len; |
| 162 | i++; |
| 163 | } |
| 164 | } |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 165 | /* If the array isn't full, we mark the end with a 0 length */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 166 | if (i < LGUEST_MAX_DMA_SECTIONS) |
| 167 | dma->len[i] = 0; |
| 168 | return len; |
| 169 | } |
| 170 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 171 | /* This creates an empty DMA, useful for prodding the Host without sending data |
| 172 | * (ie. when we want to do a read) */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 173 | static void empty_dma(struct lguest_dma *dma) |
| 174 | { |
| 175 | dma->len[0] = 0; |
| 176 | } |
| 177 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 178 | /*D:470 Setting up a request is fairly easy: */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 179 | static void setup_req(struct blockdev *bd, |
| 180 | int type, struct request *req, struct lguest_dma *dma) |
| 181 | { |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 182 | /* The type is 1 (write) or 0 (read). */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 183 | bd->lb_page->type = type; |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 184 | /* The sector on disk where the read or write starts. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 185 | bd->lb_page->sector = req->sector; |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 186 | /* The result is initialized to 0 (unfinished). */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 187 | bd->lb_page->result = 0; |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 188 | /* The current request (so we can end it in the interrupt handler). */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 189 | bd->req = req; |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 190 | /* The number of bytes: returned as a side-effect of req_to_dma(), |
| 191 | * which packs the block layer's "struct request" into our "struct |
| 192 | * lguest_dma" */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 193 | bd->lb_page->bytes = req_to_dma(req, dma); |
| 194 | } |
| 195 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 196 | /*D:450 Write is pretty straightforward: we pack the request into a "struct |
| 197 | * lguest_dma", then use SEND_DMA to send the request. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 198 | static void do_write(struct blockdev *bd, struct request *req) |
| 199 | { |
| 200 | struct lguest_dma send; |
| 201 | |
| 202 | pr_debug("lgb: WRITE sector %li\n", (long)req->sector); |
| 203 | setup_req(bd, 1, req, &send); |
| 204 | |
| 205 | lguest_send_dma(bd->phys_addr, &send); |
| 206 | } |
| 207 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 208 | /* Read is similar to write, except we pack the request into our receive |
| 209 | * "struct lguest_dma" and send through an empty DMA just to tell the Host that |
| 210 | * there's a request pending. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 211 | static void do_read(struct blockdev *bd, struct request *req) |
| 212 | { |
| 213 | struct lguest_dma ping; |
| 214 | |
| 215 | pr_debug("lgb: READ sector %li\n", (long)req->sector); |
| 216 | setup_req(bd, 0, req, &bd->dma); |
| 217 | |
| 218 | empty_dma(&ping); |
| 219 | lguest_send_dma(bd->phys_addr, &ping); |
| 220 | } |
| 221 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 222 | /*D:440 This where requests come in: we get handed the request queue and are |
| 223 | * expected to pull a "struct request" off it until we've finished them or |
| 224 | * we're waiting for a reply: */ |
Jens Axboe | 165125e | 2007-07-24 09:28:11 +0200 | [diff] [blame] | 225 | static void do_lgb_request(struct request_queue *q) |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 226 | { |
| 227 | struct blockdev *bd; |
| 228 | struct request *req; |
| 229 | |
| 230 | again: |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 231 | /* This sometimes returns NULL even on the very first time around. I |
| 232 | * wonder if it's something to do with letting elves handle the request |
| 233 | * queue... */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 234 | req = elv_next_request(q); |
| 235 | if (!req) |
| 236 | return; |
| 237 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 238 | /* We attached the struct blockdev to the disk: get it back */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 239 | bd = req->rq_disk->private_data; |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 240 | /* Sometimes we get repeated requests after blk_stop_queue(), but we |
| 241 | * can only handle one at a time. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 242 | if (bd->req) |
| 243 | return; |
| 244 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 245 | /* We only do reads and writes: no tricky business! */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 246 | if (!blk_fs_request(req)) { |
| 247 | pr_debug("Got non-command 0x%08x\n", req->cmd_type); |
| 248 | req->errors++; |
| 249 | end_entire_request(req, 0); |
| 250 | goto again; |
| 251 | } |
| 252 | |
| 253 | if (rq_data_dir(req) == WRITE) |
| 254 | do_write(bd, req); |
| 255 | else |
| 256 | do_read(bd, req); |
| 257 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 258 | /* We've put out the request, so stop any more coming in until we get |
| 259 | * an interrupt, which takes us to lgb_irq() to re-enable the queue. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 260 | blk_stop_queue(q); |
| 261 | } |
| 262 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 263 | /*D:430 This is the "struct block_device_operations" we attach to the disk at |
| 264 | * the end of lguestblk_probe(). It doesn't seem to want much. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 265 | static struct block_device_operations lguestblk_fops = { |
| 266 | .owner = THIS_MODULE, |
| 267 | }; |
| 268 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 269 | /*D:425 Setting up a disk device seems to involve a lot of code. I'm not sure |
| 270 | * quite why. I do know that the IDE code sent two or three of the maintainers |
| 271 | * insane, perhaps this is the fringe of the same disease? |
| 272 | * |
| 273 | * As in the console code, the probe function gets handed the generic |
| 274 | * lguest_device from lguest_bus.c: */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 275 | static int lguestblk_probe(struct lguest_device *lgdev) |
| 276 | { |
| 277 | struct blockdev *bd; |
| 278 | int err; |
| 279 | int irqflags = IRQF_SHARED; |
| 280 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 281 | /* First we allocate our own "struct blockdev" and initialize the easy |
| 282 | * fields. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 283 | bd = kmalloc(sizeof(*bd), GFP_KERNEL); |
| 284 | if (!bd) |
| 285 | return -ENOMEM; |
| 286 | |
| 287 | spin_lock_init(&bd->lock); |
| 288 | bd->irq = lgdev_irq(lgdev); |
| 289 | bd->req = NULL; |
| 290 | bd->dma.used_len = 0; |
| 291 | bd->dma.len[0] = 0; |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 292 | /* The descriptor in the lguest_devices array provided by the Host |
| 293 | * gives the Guest the physical page number of the device's page. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 294 | bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT); |
| 295 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 296 | /* We use lguest_map() to get a pointer to the device page */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 297 | bd->lb_page = lguest_map(bd->phys_addr, 1); |
| 298 | if (!bd->lb_page) { |
| 299 | err = -ENOMEM; |
| 300 | goto out_free_bd; |
| 301 | } |
| 302 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 303 | /* We need a major device number: 0 means "assign one dynamically". */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 304 | bd->major = register_blkdev(0, "lguestblk"); |
| 305 | if (bd->major < 0) { |
| 306 | err = bd->major; |
| 307 | goto out_unmap; |
| 308 | } |
| 309 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 310 | /* This allocates a "struct gendisk" where we pack all the information |
Rusty Russell | 9ef7ad2 | 2007-08-17 14:05:27 +1000 | [diff] [blame] | 311 | * about the disk which the rest of Linux sees. The argument is the |
| 312 | * number of minor devices desired: we need one minor for the main |
| 313 | * disk, and one for each partition. Of course, we can't possibly know |
| 314 | * how many partitions are on the disk (add_disk does that). |
| 315 | */ |
| 316 | bd->disk = alloc_disk(16); |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 317 | if (!bd->disk) { |
| 318 | err = -ENOMEM; |
| 319 | goto out_unregister_blkdev; |
| 320 | } |
| 321 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 322 | /* Every disk needs a queue for requests to come in: we set up the |
| 323 | * queue with a callback function (the core of our driver) and the lock |
| 324 | * to use. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 325 | bd->disk->queue = blk_init_queue(do_lgb_request, &bd->lock); |
| 326 | if (!bd->disk->queue) { |
| 327 | err = -ENOMEM; |
| 328 | goto out_put_disk; |
| 329 | } |
| 330 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 331 | /* We can only handle a certain number of pointers in our SEND_DMA |
| 332 | * call, so we set that with blk_queue_max_hw_segments(). This is not |
| 333 | * to be confused with blk_queue_max_phys_segments() of course! I |
| 334 | * know, who could possibly confuse the two? |
| 335 | * |
| 336 | * Well, it's simple to tell them apart: this one seems to work and the |
| 337 | * other one didn't. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 338 | blk_queue_max_hw_segments(bd->disk->queue, LGUEST_MAX_DMA_SECTIONS); |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 339 | |
| 340 | /* Due to technical limitations of our Host (and simple coding) we |
| 341 | * can't have a single buffer which crosses a page boundary. Tell it |
| 342 | * here. This means that our maximum request size is 16 |
| 343 | * (LGUEST_MAX_DMA_SECTIONS) pages. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 344 | blk_queue_segment_boundary(bd->disk->queue, PAGE_SIZE-1); |
| 345 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 346 | /* We name our disk: this becomes the device name when udev does its |
| 347 | * magic thing and creates the device node, such as /dev/lgba. |
| 348 | * next_block_index is a global which starts at 'a'. Unfortunately |
| 349 | * this simple increment logic means that the 27th disk will be called |
| 350 | * "/dev/lgb{". In that case, I recommend having at least 29 disks, so |
| 351 | * your /dev directory will be balanced. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 352 | sprintf(bd->disk->disk_name, "lgb%c", next_block_index++); |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 353 | |
| 354 | /* We look to the device descriptor again to see if this device's |
| 355 | * interrupts are expected to be random. If they are, we tell the irq |
| 356 | * subsystem. At the moment this bit is always set. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 357 | if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS) |
| 358 | irqflags |= IRQF_SAMPLE_RANDOM; |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 359 | |
| 360 | /* Now we have the name and irqflags, we can request the interrupt; we |
| 361 | * give it the "struct blockdev" we have set up to pass to lgb_irq() |
| 362 | * when there is an interrupt. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 363 | err = request_irq(bd->irq, lgb_irq, irqflags, bd->disk->disk_name, bd); |
| 364 | if (err) |
| 365 | goto out_cleanup_queue; |
| 366 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 367 | /* We bind our one-entry DMA pool to the key for this block device so |
| 368 | * the Host can reply to our requests. The key is equal to the |
| 369 | * physical address of the device's page, which is conveniently |
| 370 | * unique. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 371 | err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq); |
| 372 | if (err) |
| 373 | goto out_free_irq; |
| 374 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 375 | /* We finish our disk initialization and add the disk to the system. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 376 | bd->disk->major = bd->major; |
| 377 | bd->disk->first_minor = 0; |
| 378 | bd->disk->private_data = bd; |
| 379 | bd->disk->fops = &lguestblk_fops; |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 380 | /* This is initialized to the disk size by the Launcher. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 381 | set_capacity(bd->disk, bd->lb_page->num_sectors); |
| 382 | add_disk(bd->disk); |
| 383 | |
| 384 | printk(KERN_INFO "%s: device %i at major %d\n", |
| 385 | bd->disk->disk_name, lgdev->index, bd->major); |
| 386 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 387 | /* We don't need to keep the "struct blockdev" around, but if we ever |
| 388 | * implemented device removal, we'd need this. */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 389 | lgdev->private = bd; |
| 390 | return 0; |
| 391 | |
| 392 | out_free_irq: |
| 393 | free_irq(bd->irq, bd); |
| 394 | out_cleanup_queue: |
| 395 | blk_cleanup_queue(bd->disk->queue); |
| 396 | out_put_disk: |
| 397 | put_disk(bd->disk); |
| 398 | out_unregister_blkdev: |
| 399 | unregister_blkdev(bd->major, "lguestblk"); |
| 400 | out_unmap: |
| 401 | lguest_unmap(bd->lb_page); |
| 402 | out_free_bd: |
| 403 | kfree(bd); |
| 404 | return err; |
| 405 | } |
| 406 | |
Rusty Russell | e2c9784 | 2007-07-26 10:41:03 -0700 | [diff] [blame] | 407 | /*D:410 The boilerplate code for registering the lguest block driver is just |
| 408 | * like the console: */ |
Rusty Russell | b754416 | 2007-07-19 01:49:29 -0700 | [diff] [blame] | 409 | static struct lguest_driver lguestblk_drv = { |
| 410 | .name = "lguestblk", |
| 411 | .owner = THIS_MODULE, |
| 412 | .device_type = LGUEST_DEVICE_T_BLOCK, |
| 413 | .probe = lguestblk_probe, |
| 414 | }; |
| 415 | |
| 416 | static __init int lguestblk_init(void) |
| 417 | { |
| 418 | return register_lguest_driver(&lguestblk_drv); |
| 419 | } |
| 420 | module_init(lguestblk_init); |
| 421 | |
| 422 | MODULE_DESCRIPTION("Lguest block driver"); |
| 423 | MODULE_LICENSE("GPL"); |