blob: fb30cffe03385565f87508060aefe59ec05e77d2 [file] [log] [blame]
Alex Zeffertt1107ba82009-01-07 18:07:11 -08001/*
2 * Driver giving user-space access to the kernel's xenbus connection
3 * to xenstore.
4 *
5 * Copyright (c) 2005, Christian Limpach
6 * Copyright (c) 2005, Rusty Russell, IBM Corporation
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 *
32 * Changes:
33 * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem
34 * and /proc/xen compatibility mount point.
35 * Turned xenfs into a loadable module.
36 */
37
38#include <linux/kernel.h>
39#include <linux/errno.h>
40#include <linux/uio.h>
41#include <linux/notifier.h>
42#include <linux/wait.h>
43#include <linux/fs.h>
44#include <linux/poll.h>
45#include <linux/mutex.h>
Alexey Dobriyana99bbaf2009-10-04 16:11:37 +040046#include <linux/sched.h>
Alex Zeffertt1107ba82009-01-07 18:07:11 -080047#include <linux/spinlock.h>
48#include <linux/mount.h>
49#include <linux/pagemap.h>
50#include <linux/uaccess.h>
51#include <linux/init.h>
52#include <linux/namei.h>
53#include <linux/string.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090054#include <linux/slab.h>
Bastian Blank2fb36832011-12-10 19:29:47 +010055#include <linux/miscdevice.h>
56#include <linux/module.h>
Alex Zeffertt1107ba82009-01-07 18:07:11 -080057
Bastian Blank2fb36832011-12-10 19:29:47 +010058#include "xenbus_comms.h"
Alex Zeffertt1107ba82009-01-07 18:07:11 -080059
60#include <xen/xenbus.h>
61#include <asm/xen/hypervisor.h>
62
Bastian Blank2fb36832011-12-10 19:29:47 +010063MODULE_LICENSE("GPL");
64
Alex Zeffertt1107ba82009-01-07 18:07:11 -080065/*
66 * An element of a list of outstanding transactions, for which we're
67 * still waiting a reply.
68 */
69struct xenbus_transaction_holder {
70 struct list_head list;
71 struct xenbus_transaction handle;
72};
73
74/*
75 * A buffer of data on the queue.
76 */
77struct read_buffer {
78 struct list_head list;
79 unsigned int cons;
80 unsigned int len;
81 char msg[];
82};
83
84struct xenbus_file_priv {
85 /*
86 * msgbuffer_mutex is held while partial requests are built up
87 * and complete requests are acted on. It therefore protects
88 * the "transactions" and "watches" lists, and the partial
89 * request length and buffer.
90 *
91 * reply_mutex protects the reply being built up to return to
92 * usermode. It nests inside msgbuffer_mutex but may be held
93 * alone during a watch callback.
94 */
95 struct mutex msgbuffer_mutex;
96
97 /* In-progress transactions */
98 struct list_head transactions;
99
100 /* Active watches. */
101 struct list_head watches;
102
103 /* Partial request. */
104 unsigned int len;
105 union {
106 struct xsd_sockmsg msg;
107 char buffer[PAGE_SIZE];
108 } u;
109
110 /* Response queue. */
111 struct mutex reply_mutex;
112 struct list_head read_buffers;
113 wait_queue_head_t read_waitq;
114
115};
116
117/* Read out any raw xenbus messages queued up. */
118static ssize_t xenbus_file_read(struct file *filp,
119 char __user *ubuf,
120 size_t len, loff_t *ppos)
121{
122 struct xenbus_file_priv *u = filp->private_data;
123 struct read_buffer *rb;
124 unsigned i;
125 int ret;
126
127 mutex_lock(&u->reply_mutex);
Daniel De Graaf78081212010-09-08 18:10:42 -0400128again:
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800129 while (list_empty(&u->read_buffers)) {
130 mutex_unlock(&u->reply_mutex);
Paolo Bonzini6280f192010-06-23 18:30:15 +0200131 if (filp->f_flags & O_NONBLOCK)
132 return -EAGAIN;
133
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800134 ret = wait_event_interruptible(u->read_waitq,
135 !list_empty(&u->read_buffers));
136 if (ret)
137 return ret;
138 mutex_lock(&u->reply_mutex);
139 }
140
141 rb = list_entry(u->read_buffers.next, struct read_buffer, list);
142 i = 0;
143 while (i < len) {
144 unsigned sz = min((unsigned)len - i, rb->len - rb->cons);
145
146 ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz);
147
148 i += sz - ret;
149 rb->cons += sz - ret;
150
Jeremy Fitzhardingefb27cfb2010-08-25 12:19:53 -0700151 if (ret != 0) {
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800152 if (i == 0)
153 i = -EFAULT;
154 goto out;
155 }
156
157 /* Clear out buffer if it has been consumed */
158 if (rb->cons == rb->len) {
159 list_del(&rb->list);
160 kfree(rb);
161 if (list_empty(&u->read_buffers))
162 break;
163 rb = list_entry(u->read_buffers.next,
164 struct read_buffer, list);
165 }
166 }
Daniel De Graaf78081212010-09-08 18:10:42 -0400167 if (i == 0)
168 goto again;
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800169
170out:
171 mutex_unlock(&u->reply_mutex);
172 return i;
173}
174
175/*
176 * Add a buffer to the queue. Caller must hold the appropriate lock
177 * if the queue is not local. (Commonly the caller will build up
178 * multiple queued buffers on a temporary local list, and then add it
179 * to the appropriate list under lock once all the buffers have een
180 * successfully allocated.)
181 */
182static int queue_reply(struct list_head *queue, const void *data, size_t len)
183{
184 struct read_buffer *rb;
185
186 if (len == 0)
187 return 0;
188
189 rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
190 if (rb == NULL)
191 return -ENOMEM;
192
193 rb->cons = 0;
194 rb->len = len;
195
196 memcpy(rb->msg, data, len);
197
198 list_add_tail(&rb->list, queue);
199 return 0;
200}
201
202/*
203 * Free all the read_buffer s on a list.
204 * Caller must have sole reference to list.
205 */
206static void queue_cleanup(struct list_head *list)
207{
208 struct read_buffer *rb;
209
210 while (!list_empty(list)) {
211 rb = list_entry(list->next, struct read_buffer, list);
212 list_del(list->next);
213 kfree(rb);
214 }
215}
216
217struct watch_adapter {
218 struct list_head list;
219 struct xenbus_watch watch;
220 struct xenbus_file_priv *dev_data;
221 char *token;
222};
223
224static void free_watch_adapter(struct watch_adapter *watch)
225{
226 kfree(watch->watch.node);
227 kfree(watch->token);
228 kfree(watch);
229}
230
231static struct watch_adapter *alloc_watch_adapter(const char *path,
232 const char *token)
233{
234 struct watch_adapter *watch;
235
236 watch = kzalloc(sizeof(*watch), GFP_KERNEL);
237 if (watch == NULL)
238 goto out_fail;
239
240 watch->watch.node = kstrdup(path, GFP_KERNEL);
241 if (watch->watch.node == NULL)
242 goto out_free;
243
244 watch->token = kstrdup(token, GFP_KERNEL);
245 if (watch->token == NULL)
246 goto out_free;
247
248 return watch;
249
250out_free:
251 free_watch_adapter(watch);
252
253out_fail:
254 return NULL;
255}
256
257static void watch_fired(struct xenbus_watch *watch,
258 const char **vec,
259 unsigned int len)
260{
261 struct watch_adapter *adap;
262 struct xsd_sockmsg hdr;
263 const char *path, *token;
264 int path_len, tok_len, body_len, data_len = 0;
265 int ret;
266 LIST_HEAD(staging_q);
267
268 adap = container_of(watch, struct watch_adapter, watch);
269
270 path = vec[XS_WATCH_PATH];
271 token = adap->token;
272
273 path_len = strlen(path) + 1;
274 tok_len = strlen(token) + 1;
275 if (len > 2)
276 data_len = vec[len] - vec[2] + 1;
277 body_len = path_len + tok_len + data_len;
278
279 hdr.type = XS_WATCH_EVENT;
280 hdr.len = body_len;
281
282 mutex_lock(&adap->dev_data->reply_mutex);
283
284 ret = queue_reply(&staging_q, &hdr, sizeof(hdr));
285 if (!ret)
286 ret = queue_reply(&staging_q, path, path_len);
287 if (!ret)
288 ret = queue_reply(&staging_q, token, tok_len);
289 if (!ret && len > 2)
290 ret = queue_reply(&staging_q, vec[2], data_len);
291
292 if (!ret) {
293 /* success: pass reply list onto watcher */
294 list_splice_tail(&staging_q, &adap->dev_data->read_buffers);
295 wake_up(&adap->dev_data->read_waitq);
296 } else
297 queue_cleanup(&staging_q);
298
299 mutex_unlock(&adap->dev_data->reply_mutex);
300}
301
302static int xenbus_write_transaction(unsigned msg_type,
303 struct xenbus_file_priv *u)
304{
Ian Campbelle88a0fa2009-01-24 08:22:47 +0000305 int rc;
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800306 void *reply;
307 struct xenbus_transaction_holder *trans = NULL;
308 LIST_HEAD(staging_q);
309
310 if (msg_type == XS_TRANSACTION_START) {
311 trans = kmalloc(sizeof(*trans), GFP_KERNEL);
312 if (!trans) {
313 rc = -ENOMEM;
314 goto out;
315 }
316 }
317
318 reply = xenbus_dev_request_and_reply(&u->u.msg);
319 if (IS_ERR(reply)) {
320 kfree(trans);
321 rc = PTR_ERR(reply);
322 goto out;
323 }
324
325 if (msg_type == XS_TRANSACTION_START) {
326 trans->handle.id = simple_strtoul(reply, NULL, 0);
327
328 list_add(&trans->list, &u->transactions);
329 } else if (msg_type == XS_TRANSACTION_END) {
330 list_for_each_entry(trans, &u->transactions, list)
331 if (trans->handle.id == u->u.msg.tx_id)
332 break;
333 BUG_ON(&trans->list == &u->transactions);
334 list_del(&trans->list);
335
336 kfree(trans);
337 }
338
339 mutex_lock(&u->reply_mutex);
Ian Campbelle88a0fa2009-01-24 08:22:47 +0000340 rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg));
341 if (!rc)
342 rc = queue_reply(&staging_q, reply, u->u.msg.len);
343 if (!rc) {
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800344 list_splice_tail(&staging_q, &u->read_buffers);
345 wake_up(&u->read_waitq);
346 } else {
347 queue_cleanup(&staging_q);
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800348 }
349 mutex_unlock(&u->reply_mutex);
350
351 kfree(reply);
352
353out:
354 return rc;
355}
356
357static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u)
358{
359 struct watch_adapter *watch, *tmp_watch;
360 char *path, *token;
361 int err, rc;
362 LIST_HEAD(staging_q);
363
364 path = u->u.buffer + sizeof(u->u.msg);
365 token = memchr(path, 0, u->u.msg.len);
366 if (token == NULL) {
367 rc = -EILSEQ;
368 goto out;
369 }
370 token++;
371
372 if (msg_type == XS_WATCH) {
373 watch = alloc_watch_adapter(path, token);
374 if (watch == NULL) {
375 rc = -ENOMEM;
376 goto out;
377 }
378
379 watch->watch.callback = watch_fired;
380 watch->dev_data = u;
381
382 err = register_xenbus_watch(&watch->watch);
383 if (err) {
384 free_watch_adapter(watch);
385 rc = err;
386 goto out;
387 }
388 list_add(&watch->list, &u->watches);
389 } else {
390 list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
391 if (!strcmp(watch->token, token) &&
392 !strcmp(watch->watch.node, path)) {
393 unregister_xenbus_watch(&watch->watch);
394 list_del(&watch->list);
395 free_watch_adapter(watch);
396 break;
397 }
398 }
399 }
400
401 /* Success. Synthesize a reply to say all is OK. */
402 {
403 struct {
404 struct xsd_sockmsg hdr;
405 char body[3];
406 } __packed reply = {
407 {
408 .type = msg_type,
409 .len = sizeof(reply.body)
410 },
411 "OK"
412 };
413
414 mutex_lock(&u->reply_mutex);
415 rc = queue_reply(&u->read_buffers, &reply, sizeof(reply));
Daniel De Graaf76ce7612010-09-07 11:42:18 -0400416 wake_up(&u->read_waitq);
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800417 mutex_unlock(&u->reply_mutex);
418 }
419
420out:
421 return rc;
422}
423
424static ssize_t xenbus_file_write(struct file *filp,
425 const char __user *ubuf,
426 size_t len, loff_t *ppos)
427{
428 struct xenbus_file_priv *u = filp->private_data;
429 uint32_t msg_type;
430 int rc = len;
431 int ret;
432 LIST_HEAD(staging_q);
433
434 /*
435 * We're expecting usermode to be writing properly formed
436 * xenbus messages. If they write an incomplete message we
437 * buffer it up. Once it is complete, we act on it.
438 */
439
440 /*
441 * Make sure concurrent writers can't stomp all over each
442 * other's messages and make a mess of our partial message
443 * buffer. We don't make any attemppt to stop multiple
444 * writers from making a mess of each other's incomplete
445 * messages; we're just trying to guarantee our own internal
446 * consistency and make sure that single writes are handled
447 * atomically.
448 */
449 mutex_lock(&u->msgbuffer_mutex);
450
451 /* Get this out of the way early to avoid confusion */
452 if (len == 0)
453 goto out;
454
455 /* Can't write a xenbus message larger we can buffer */
456 if ((len + u->len) > sizeof(u->u.buffer)) {
457 /* On error, dump existing buffer */
458 u->len = 0;
459 rc = -EINVAL;
460 goto out;
461 }
462
463 ret = copy_from_user(u->u.buffer + u->len, ubuf, len);
464
Jeremy Fitzhardingefb27cfb2010-08-25 12:19:53 -0700465 if (ret != 0) {
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800466 rc = -EFAULT;
467 goto out;
468 }
469
470 /* Deal with a partial copy. */
471 len -= ret;
472 rc = len;
473
474 u->len += len;
475
476 /* Return if we haven't got a full message yet */
477 if (u->len < sizeof(u->u.msg))
478 goto out; /* not even the header yet */
479
480 /* If we're expecting a message that's larger than we can
481 possibly send, dump what we have and return an error. */
482 if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) {
483 rc = -E2BIG;
484 u->len = 0;
485 goto out;
486 }
487
488 if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
489 goto out; /* incomplete data portion */
490
491 /*
492 * OK, now we have a complete message. Do something with it.
493 */
494
495 msg_type = u->u.msg.type;
496
497 switch (msg_type) {
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800498 case XS_WATCH:
499 case XS_UNWATCH:
500 /* (Un)Ask for some path to be watched for changes */
501 ret = xenbus_write_watch(msg_type, u);
502 break;
503
504 default:
Diego Ongaro6d6df2e2010-09-01 09:18:54 -0700505 /* Send out a transaction */
506 ret = xenbus_write_transaction(msg_type, u);
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800507 break;
508 }
509 if (ret != 0)
510 rc = ret;
511
512 /* Buffered message consumed */
513 u->len = 0;
514
515 out:
516 mutex_unlock(&u->msgbuffer_mutex);
517 return rc;
518}
519
520static int xenbus_file_open(struct inode *inode, struct file *filp)
521{
522 struct xenbus_file_priv *u;
523
524 if (xen_store_evtchn == 0)
525 return -ENOENT;
526
527 nonseekable_open(inode, filp);
528
529 u = kzalloc(sizeof(*u), GFP_KERNEL);
530 if (u == NULL)
531 return -ENOMEM;
532
533 INIT_LIST_HEAD(&u->transactions);
534 INIT_LIST_HEAD(&u->watches);
535 INIT_LIST_HEAD(&u->read_buffers);
536 init_waitqueue_head(&u->read_waitq);
537
538 mutex_init(&u->reply_mutex);
539 mutex_init(&u->msgbuffer_mutex);
540
541 filp->private_data = u;
542
543 return 0;
544}
545
546static int xenbus_file_release(struct inode *inode, struct file *filp)
547{
548 struct xenbus_file_priv *u = filp->private_data;
549 struct xenbus_transaction_holder *trans, *tmp;
550 struct watch_adapter *watch, *tmp_watch;
Daniel De Graaf6a5b3be2010-12-20 14:56:09 -0800551 struct read_buffer *rb, *tmp_rb;
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800552
553 /*
554 * No need for locking here because there are no other users,
555 * by definition.
556 */
557
558 list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
559 xenbus_transaction_end(trans->handle, 1);
560 list_del(&trans->list);
561 kfree(trans);
562 }
563
564 list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
565 unregister_xenbus_watch(&watch->watch);
566 list_del(&watch->list);
567 free_watch_adapter(watch);
568 }
569
Daniel De Graaf6a5b3be2010-12-20 14:56:09 -0800570 list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
571 list_del(&rb->list);
572 kfree(rb);
573 }
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800574 kfree(u);
575
576 return 0;
577}
578
579static unsigned int xenbus_file_poll(struct file *file, poll_table *wait)
580{
581 struct xenbus_file_priv *u = file->private_data;
582
583 poll_wait(file, &u->read_waitq, wait);
584 if (!list_empty(&u->read_buffers))
585 return POLLIN | POLLRDNORM;
586 return 0;
587}
588
Bastian Blank2fb36832011-12-10 19:29:47 +0100589const struct file_operations xen_xenbus_fops = {
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800590 .read = xenbus_file_read,
591 .write = xenbus_file_write,
592 .open = xenbus_file_open,
593 .release = xenbus_file_release,
594 .poll = xenbus_file_poll,
Arnd Bergmann6038f372010-08-15 18:52:59 +0200595 .llseek = no_llseek,
Alex Zeffertt1107ba82009-01-07 18:07:11 -0800596};
Bastian Blank2fb36832011-12-10 19:29:47 +0100597EXPORT_SYMBOL_GPL(xen_xenbus_fops);
598
599static struct miscdevice xenbus_dev = {
600 .minor = MISC_DYNAMIC_MINOR,
601 .name = "xen/xenbus",
602 .fops = &xen_xenbus_fops,
603};
604
605static int __init xenbus_init(void)
606{
607 int err;
608
609 if (!xen_domain())
610 return -ENODEV;
611
612 err = misc_register(&xenbus_dev);
613 if (err)
614 printk(KERN_ERR "Could not register xenbus device\n");
615 return err;
616}
617
618static void __exit xenbus_exit(void)
619{
620 misc_deregister(&xenbus_dev);
621}
622
623module_init(xenbus_init);
624module_exit(xenbus_exit);