Rusty Russell | f938d2c | 2007-07-26 10:41:02 -0700 | [diff] [blame^] | 1 | /*P:300 The I/O mechanism in lguest is simple yet flexible, allowing the Guest |
| 2 | * to talk to the Launcher or directly to another Guest. It uses familiar |
| 3 | * concepts of DMA and interrupts, plus some neat code stolen from |
| 4 | * futexes... :*/ |
| 5 | |
| 6 | /* Copyright (C) 2006 Rusty Russell IBM Corporation |
Rusty Russell | d7e28ff | 2007-07-19 01:49:23 -0700 | [diff] [blame] | 7 | * |
| 8 | * This program is free software; you can redistribute it and/or modify |
| 9 | * it under the terms of the GNU General Public License as published by |
| 10 | * the Free Software Foundation; either version 2 of the License, or |
| 11 | * (at your option) any later version. |
| 12 | * |
| 13 | * This program is distributed in the hope that it will be useful, |
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | * GNU General Public License for more details. |
| 17 | * |
| 18 | * You should have received a copy of the GNU General Public License |
| 19 | * along with this program; if not, write to the Free Software |
| 20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| 21 | */ |
| 22 | #include <linux/types.h> |
| 23 | #include <linux/futex.h> |
| 24 | #include <linux/jhash.h> |
| 25 | #include <linux/mm.h> |
| 26 | #include <linux/highmem.h> |
| 27 | #include <linux/uaccess.h> |
| 28 | #include "lg.h" |
| 29 | |
| 30 | static struct list_head dma_hash[61]; |
| 31 | |
| 32 | void lguest_io_init(void) |
| 33 | { |
| 34 | unsigned int i; |
| 35 | |
| 36 | for (i = 0; i < ARRAY_SIZE(dma_hash); i++) |
| 37 | INIT_LIST_HEAD(&dma_hash[i]); |
| 38 | } |
| 39 | |
| 40 | /* FIXME: allow multi-page lengths. */ |
| 41 | static int check_dma_list(struct lguest *lg, const struct lguest_dma *dma) |
| 42 | { |
| 43 | unsigned int i; |
| 44 | |
| 45 | for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { |
| 46 | if (!dma->len[i]) |
| 47 | return 1; |
| 48 | if (!lguest_address_ok(lg, dma->addr[i], dma->len[i])) |
| 49 | goto kill; |
| 50 | if (dma->len[i] > PAGE_SIZE) |
| 51 | goto kill; |
| 52 | /* We could do over a page, but is it worth it? */ |
| 53 | if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE) |
| 54 | goto kill; |
| 55 | } |
| 56 | return 1; |
| 57 | |
| 58 | kill: |
| 59 | kill_guest(lg, "bad DMA entry: %u@%#lx", dma->len[i], dma->addr[i]); |
| 60 | return 0; |
| 61 | } |
| 62 | |
| 63 | static unsigned int hash(const union futex_key *key) |
| 64 | { |
| 65 | return jhash2((u32*)&key->both.word, |
| 66 | (sizeof(key->both.word)+sizeof(key->both.ptr))/4, |
| 67 | key->both.offset) |
| 68 | % ARRAY_SIZE(dma_hash); |
| 69 | } |
| 70 | |
| 71 | static inline int key_eq(const union futex_key *a, const union futex_key *b) |
| 72 | { |
| 73 | return (a->both.word == b->both.word |
| 74 | && a->both.ptr == b->both.ptr |
| 75 | && a->both.offset == b->both.offset); |
| 76 | } |
| 77 | |
| 78 | /* Must hold read lock on dmainfo owner's current->mm->mmap_sem */ |
| 79 | static void unlink_dma(struct lguest_dma_info *dmainfo) |
| 80 | { |
| 81 | BUG_ON(!mutex_is_locked(&lguest_lock)); |
| 82 | dmainfo->interrupt = 0; |
| 83 | list_del(&dmainfo->list); |
| 84 | drop_futex_key_refs(&dmainfo->key); |
| 85 | } |
| 86 | |
| 87 | static int unbind_dma(struct lguest *lg, |
| 88 | const union futex_key *key, |
| 89 | unsigned long dmas) |
| 90 | { |
| 91 | int i, ret = 0; |
| 92 | |
| 93 | for (i = 0; i < LGUEST_MAX_DMA; i++) { |
| 94 | if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) { |
| 95 | unlink_dma(&lg->dma[i]); |
| 96 | ret = 1; |
| 97 | break; |
| 98 | } |
| 99 | } |
| 100 | return ret; |
| 101 | } |
| 102 | |
| 103 | int bind_dma(struct lguest *lg, |
| 104 | unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt) |
| 105 | { |
| 106 | unsigned int i; |
| 107 | int ret = 0; |
| 108 | union futex_key key; |
| 109 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; |
| 110 | |
| 111 | if (interrupt >= LGUEST_IRQS) |
| 112 | return 0; |
| 113 | |
| 114 | mutex_lock(&lguest_lock); |
| 115 | down_read(fshared); |
| 116 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { |
| 117 | kill_guest(lg, "bad dma key %#lx", ukey); |
| 118 | goto unlock; |
| 119 | } |
| 120 | get_futex_key_refs(&key); |
| 121 | |
| 122 | if (interrupt == 0) |
| 123 | ret = unbind_dma(lg, &key, dmas); |
| 124 | else { |
| 125 | for (i = 0; i < LGUEST_MAX_DMA; i++) { |
| 126 | if (lg->dma[i].interrupt) |
| 127 | continue; |
| 128 | |
| 129 | lg->dma[i].dmas = dmas; |
| 130 | lg->dma[i].num_dmas = numdmas; |
| 131 | lg->dma[i].next_dma = 0; |
| 132 | lg->dma[i].key = key; |
| 133 | lg->dma[i].guestid = lg->guestid; |
| 134 | lg->dma[i].interrupt = interrupt; |
| 135 | list_add(&lg->dma[i].list, &dma_hash[hash(&key)]); |
| 136 | ret = 1; |
| 137 | goto unlock; |
| 138 | } |
| 139 | } |
| 140 | drop_futex_key_refs(&key); |
| 141 | unlock: |
| 142 | up_read(fshared); |
| 143 | mutex_unlock(&lguest_lock); |
| 144 | return ret; |
| 145 | } |
| 146 | |
| 147 | /* lgread from another guest */ |
| 148 | static int lgread_other(struct lguest *lg, |
| 149 | void *buf, u32 addr, unsigned bytes) |
| 150 | { |
| 151 | if (!lguest_address_ok(lg, addr, bytes) |
| 152 | || access_process_vm(lg->tsk, addr, buf, bytes, 0) != bytes) { |
| 153 | memset(buf, 0, bytes); |
| 154 | kill_guest(lg, "bad address in registered DMA struct"); |
| 155 | return 0; |
| 156 | } |
| 157 | return 1; |
| 158 | } |
| 159 | |
| 160 | /* lgwrite to another guest */ |
| 161 | static int lgwrite_other(struct lguest *lg, u32 addr, |
| 162 | const void *buf, unsigned bytes) |
| 163 | { |
| 164 | if (!lguest_address_ok(lg, addr, bytes) |
| 165 | || (access_process_vm(lg->tsk, addr, (void *)buf, bytes, 1) |
| 166 | != bytes)) { |
| 167 | kill_guest(lg, "bad address writing to registered DMA"); |
| 168 | return 0; |
| 169 | } |
| 170 | return 1; |
| 171 | } |
| 172 | |
| 173 | static u32 copy_data(struct lguest *srclg, |
| 174 | const struct lguest_dma *src, |
| 175 | const struct lguest_dma *dst, |
| 176 | struct page *pages[]) |
| 177 | { |
| 178 | unsigned int totlen, si, di, srcoff, dstoff; |
| 179 | void *maddr = NULL; |
| 180 | |
| 181 | totlen = 0; |
| 182 | si = di = 0; |
| 183 | srcoff = dstoff = 0; |
| 184 | while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si] |
| 185 | && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) { |
| 186 | u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff); |
| 187 | |
| 188 | if (!maddr) |
| 189 | maddr = kmap(pages[di]); |
| 190 | |
| 191 | /* FIXME: This is not completely portable, since |
| 192 | archs do different things for copy_to_user_page. */ |
| 193 | if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, |
Al Viro | 6d14bfe | 2007-07-20 16:10:24 +0100 | [diff] [blame] | 194 | (void __user *)src->addr[si], len) != 0) { |
Rusty Russell | d7e28ff | 2007-07-19 01:49:23 -0700 | [diff] [blame] | 195 | kill_guest(srclg, "bad address in sending DMA"); |
| 196 | totlen = 0; |
| 197 | break; |
| 198 | } |
| 199 | |
| 200 | totlen += len; |
| 201 | srcoff += len; |
| 202 | dstoff += len; |
| 203 | if (srcoff == src->len[si]) { |
| 204 | si++; |
| 205 | srcoff = 0; |
| 206 | } |
| 207 | if (dstoff == dst->len[di]) { |
| 208 | kunmap(pages[di]); |
| 209 | maddr = NULL; |
| 210 | di++; |
| 211 | dstoff = 0; |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | if (maddr) |
| 216 | kunmap(pages[di]); |
| 217 | |
| 218 | return totlen; |
| 219 | } |
| 220 | |
| 221 | /* Src is us, ie. current. */ |
| 222 | static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src, |
| 223 | struct lguest *dstlg, const struct lguest_dma *dst) |
| 224 | { |
| 225 | int i; |
| 226 | u32 ret; |
| 227 | struct page *pages[LGUEST_MAX_DMA_SECTIONS]; |
| 228 | |
| 229 | if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src)) |
| 230 | return 0; |
| 231 | |
| 232 | /* First get the destination pages */ |
| 233 | for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { |
| 234 | if (dst->len[i] == 0) |
| 235 | break; |
| 236 | if (get_user_pages(dstlg->tsk, dstlg->mm, |
| 237 | dst->addr[i], 1, 1, 1, pages+i, NULL) |
| 238 | != 1) { |
| 239 | kill_guest(dstlg, "Error mapping DMA pages"); |
| 240 | ret = 0; |
| 241 | goto drop_pages; |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | /* Now copy until we run out of src or dst. */ |
| 246 | ret = copy_data(srclg, src, dst, pages); |
| 247 | |
| 248 | drop_pages: |
| 249 | while (--i >= 0) |
| 250 | put_page(pages[i]); |
| 251 | return ret; |
| 252 | } |
| 253 | |
| 254 | static int dma_transfer(struct lguest *srclg, |
| 255 | unsigned long udma, |
| 256 | struct lguest_dma_info *dst) |
| 257 | { |
| 258 | struct lguest_dma dst_dma, src_dma; |
| 259 | struct lguest *dstlg; |
| 260 | u32 i, dma = 0; |
| 261 | |
| 262 | dstlg = &lguests[dst->guestid]; |
| 263 | /* Get our dma list. */ |
| 264 | lgread(srclg, &src_dma, udma, sizeof(src_dma)); |
| 265 | |
| 266 | /* We can't deadlock against them dmaing to us, because this |
| 267 | * is all under the lguest_lock. */ |
| 268 | down_read(&dstlg->mm->mmap_sem); |
| 269 | |
| 270 | for (i = 0; i < dst->num_dmas; i++) { |
| 271 | dma = (dst->next_dma + i) % dst->num_dmas; |
| 272 | if (!lgread_other(dstlg, &dst_dma, |
| 273 | dst->dmas + dma * sizeof(struct lguest_dma), |
| 274 | sizeof(dst_dma))) { |
| 275 | goto fail; |
| 276 | } |
| 277 | if (!dst_dma.used_len) |
| 278 | break; |
| 279 | } |
| 280 | if (i != dst->num_dmas) { |
| 281 | unsigned long used_lenp; |
| 282 | unsigned int ret; |
| 283 | |
| 284 | ret = do_dma(srclg, &src_dma, dstlg, &dst_dma); |
| 285 | /* Put used length in src. */ |
| 286 | lgwrite_u32(srclg, |
| 287 | udma+offsetof(struct lguest_dma, used_len), ret); |
| 288 | if (ret == 0 && src_dma.len[0] != 0) |
| 289 | goto fail; |
| 290 | |
| 291 | /* Make sure destination sees contents before length. */ |
| 292 | wmb(); |
| 293 | used_lenp = dst->dmas |
| 294 | + dma * sizeof(struct lguest_dma) |
| 295 | + offsetof(struct lguest_dma, used_len); |
| 296 | lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret)); |
| 297 | dst->next_dma++; |
| 298 | } |
| 299 | up_read(&dstlg->mm->mmap_sem); |
| 300 | |
| 301 | /* Do this last so dst doesn't simply sleep on lock. */ |
| 302 | set_bit(dst->interrupt, dstlg->irqs_pending); |
| 303 | wake_up_process(dstlg->tsk); |
| 304 | return i == dst->num_dmas; |
| 305 | |
| 306 | fail: |
| 307 | up_read(&dstlg->mm->mmap_sem); |
| 308 | return 0; |
| 309 | } |
| 310 | |
| 311 | void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma) |
| 312 | { |
| 313 | union futex_key key; |
| 314 | int empty = 0; |
| 315 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; |
| 316 | |
| 317 | again: |
| 318 | mutex_lock(&lguest_lock); |
| 319 | down_read(fshared); |
| 320 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { |
| 321 | kill_guest(lg, "bad sending DMA key"); |
| 322 | goto unlock; |
| 323 | } |
| 324 | /* Shared mapping? Look for other guests... */ |
| 325 | if (key.shared.offset & 1) { |
| 326 | struct lguest_dma_info *i; |
| 327 | list_for_each_entry(i, &dma_hash[hash(&key)], list) { |
| 328 | if (i->guestid == lg->guestid) |
| 329 | continue; |
| 330 | if (!key_eq(&key, &i->key)) |
| 331 | continue; |
| 332 | |
| 333 | empty += dma_transfer(lg, udma, i); |
| 334 | break; |
| 335 | } |
| 336 | if (empty == 1) { |
| 337 | /* Give any recipients one chance to restock. */ |
| 338 | up_read(¤t->mm->mmap_sem); |
| 339 | mutex_unlock(&lguest_lock); |
| 340 | empty++; |
| 341 | goto again; |
| 342 | } |
| 343 | } else { |
| 344 | /* Private mapping: tell our userspace. */ |
| 345 | lg->dma_is_pending = 1; |
| 346 | lg->pending_dma = udma; |
| 347 | lg->pending_key = ukey; |
| 348 | } |
| 349 | unlock: |
| 350 | up_read(fshared); |
| 351 | mutex_unlock(&lguest_lock); |
| 352 | } |
| 353 | |
| 354 | void release_all_dma(struct lguest *lg) |
| 355 | { |
| 356 | unsigned int i; |
| 357 | |
| 358 | BUG_ON(!mutex_is_locked(&lguest_lock)); |
| 359 | |
| 360 | down_read(&lg->mm->mmap_sem); |
| 361 | for (i = 0; i < LGUEST_MAX_DMA; i++) { |
| 362 | if (lg->dma[i].interrupt) |
| 363 | unlink_dma(&lg->dma[i]); |
| 364 | } |
| 365 | up_read(&lg->mm->mmap_sem); |
| 366 | } |
| 367 | |
| 368 | /* Userspace wants a dma buffer from this guest. */ |
| 369 | unsigned long get_dma_buffer(struct lguest *lg, |
| 370 | unsigned long ukey, unsigned long *interrupt) |
| 371 | { |
| 372 | unsigned long ret = 0; |
| 373 | union futex_key key; |
| 374 | struct lguest_dma_info *i; |
| 375 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; |
| 376 | |
| 377 | mutex_lock(&lguest_lock); |
| 378 | down_read(fshared); |
| 379 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { |
| 380 | kill_guest(lg, "bad registered DMA buffer"); |
| 381 | goto unlock; |
| 382 | } |
| 383 | list_for_each_entry(i, &dma_hash[hash(&key)], list) { |
| 384 | if (key_eq(&key, &i->key) && i->guestid == lg->guestid) { |
| 385 | unsigned int j; |
| 386 | for (j = 0; j < i->num_dmas; j++) { |
| 387 | struct lguest_dma dma; |
| 388 | |
| 389 | ret = i->dmas + j * sizeof(struct lguest_dma); |
| 390 | lgread(lg, &dma, ret, sizeof(dma)); |
| 391 | if (dma.used_len == 0) |
| 392 | break; |
| 393 | } |
| 394 | *interrupt = i->interrupt; |
| 395 | break; |
| 396 | } |
| 397 | } |
| 398 | unlock: |
| 399 | up_read(fshared); |
| 400 | mutex_unlock(&lguest_lock); |
| 401 | return ret; |
| 402 | } |
| 403 | |