lguest: the host code

This is the code for the "lg.ko" module, which allows lguest guests to
be launched.

[akpm@linux-foundation.org: update for futex-new-private-futexes]
[akpm@linux-foundation.org: build fix]
[jmorris@namei.org: lguest: use hrtimers]
[akpm@linux-foundation.org: x86_64 build fix]
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
new file mode 100644
index 0000000..e90d7a7
--- /dev/null
+++ b/drivers/lguest/lguest_user.c
@@ -0,0 +1,236 @@
+/* Userspace control of the guest, via /dev/lguest. */
+#include <linux/uaccess.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include "lg.h"
+
+static void setup_regs(struct lguest_regs *regs, unsigned long start)
+{
+	/* Write out stack in format lguest expects, so we can switch to it. */
+	regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL;
+	regs->cs = __KERNEL_CS|GUEST_PL;
+	regs->eflags = 0x202; 	/* Interrupts enabled. */
+	regs->eip = start;
+	/* esi points to our boot information (physical address 0) */
+}
+
+/* + addr */
+static long user_get_dma(struct lguest *lg, const u32 __user *input)
+{
+	unsigned long key, udma, irq;
+
+	if (get_user(key, input) != 0)
+		return -EFAULT;
+	udma = get_dma_buffer(lg, key, &irq);
+	if (!udma)
+		return -ENOENT;
+
+	/* We put irq number in udma->used_len. */
+	lgwrite_u32(lg, udma + offsetof(struct lguest_dma, used_len), irq);
+	return udma;
+}
+
+/* To force the Guest to stop running and return to the Launcher, the
+ * Waker sets writes LHREQ_BREAK and the value "1" to /dev/lguest.  The
+ * Launcher then writes LHREQ_BREAK and "0" to release the Waker. */
+static int break_guest_out(struct lguest *lg, const u32 __user *input)
+{
+	unsigned long on;
+
+	/* Fetch whether they're turning break on or off.. */
+	if (get_user(on, input) != 0)
+		return -EFAULT;
+
+	if (on) {
+		lg->break_out = 1;
+		/* Pop it out (may be running on different CPU) */
+		wake_up_process(lg->tsk);
+		/* Wait for them to reset it */
+		return wait_event_interruptible(lg->break_wq, !lg->break_out);
+	} else {
+		lg->break_out = 0;
+		wake_up(&lg->break_wq);
+		return 0;
+	}
+}
+
+/* + irq */
+static int user_send_irq(struct lguest *lg, const u32 __user *input)
+{
+	u32 irq;
+
+	if (get_user(irq, input) != 0)
+		return -EFAULT;
+	if (irq >= LGUEST_IRQS)
+		return -EINVAL;
+	set_bit(irq, lg->irqs_pending);
+	return 0;
+}
+
+static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
+{
+	struct lguest *lg = file->private_data;
+
+	if (!lg)
+		return -EINVAL;
+
+	/* If you're not the task which owns the guest, go away. */
+	if (current != lg->tsk)
+		return -EPERM;
+
+	if (lg->dead) {
+		size_t len;
+
+		if (IS_ERR(lg->dead))
+			return PTR_ERR(lg->dead);
+
+		len = min(size, strlen(lg->dead)+1);
+		if (copy_to_user(user, lg->dead, len) != 0)
+			return -EFAULT;
+		return len;
+	}
+
+	if (lg->dma_is_pending)
+		lg->dma_is_pending = 0;
+
+	return run_guest(lg, (unsigned long __user *)user);
+}
+
+/* Take: pfnlimit, pgdir, start, pageoffset. */
+static int initialize(struct file *file, const u32 __user *input)
+{
+	struct lguest *lg;
+	int err, i;
+	u32 args[4];
+
+	/* We grab the Big Lguest lock, which protects the global array
+	 * "lguests" and multiple simultaneous initializations. */
+	mutex_lock(&lguest_lock);
+
+	if (file->private_data) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
+	if (copy_from_user(args, input, sizeof(args)) != 0) {
+		err = -EFAULT;
+		goto unlock;
+	}
+
+	i = find_free_guest();
+	if (i < 0) {
+		err = -ENOSPC;
+		goto unlock;
+	}
+	lg = &lguests[i];
+	lg->guestid = i;
+	lg->pfn_limit = args[0];
+	lg->page_offset = args[3];
+	lg->regs_page = get_zeroed_page(GFP_KERNEL);
+	if (!lg->regs_page) {
+		err = -ENOMEM;
+		goto release_guest;
+	}
+	lg->regs = (void *)lg->regs_page + PAGE_SIZE - sizeof(*lg->regs);
+
+	err = init_guest_pagetable(lg, args[1]);
+	if (err)
+		goto free_regs;
+
+	setup_regs(lg->regs, args[2]);
+	setup_guest_gdt(lg);
+	init_clockdev(lg);
+	lg->tsk = current;
+	lg->mm = get_task_mm(lg->tsk);
+	init_waitqueue_head(&lg->break_wq);
+	lg->last_pages = NULL;
+	file->private_data = lg;
+
+	mutex_unlock(&lguest_lock);
+
+	return sizeof(args);
+
+free_regs:
+	free_page(lg->regs_page);
+release_guest:
+	memset(lg, 0, sizeof(*lg));
+unlock:
+	mutex_unlock(&lguest_lock);
+	return err;
+}
+
+static ssize_t write(struct file *file, const char __user *input,
+		     size_t size, loff_t *off)
+{
+	struct lguest *lg = file->private_data;
+	u32 req;
+
+	if (get_user(req, input) != 0)
+		return -EFAULT;
+	input += sizeof(req);
+
+	if (req != LHREQ_INITIALIZE && !lg)
+		return -EINVAL;
+	if (lg && lg->dead)
+		return -ENOENT;
+
+	/* If you're not the task which owns the Guest, you can only break */
+	if (lg && current != lg->tsk && req != LHREQ_BREAK)
+		return -EPERM;
+
+	switch (req) {
+	case LHREQ_INITIALIZE:
+		return initialize(file, (const u32 __user *)input);
+	case LHREQ_GETDMA:
+		return user_get_dma(lg, (const u32 __user *)input);
+	case LHREQ_IRQ:
+		return user_send_irq(lg, (const u32 __user *)input);
+	case LHREQ_BREAK:
+		return break_guest_out(lg, (const u32 __user *)input);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int close(struct inode *inode, struct file *file)
+{
+	struct lguest *lg = file->private_data;
+
+	if (!lg)
+		return 0;
+
+	mutex_lock(&lguest_lock);
+	/* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */
+	hrtimer_cancel(&lg->hrt);
+	release_all_dma(lg);
+	free_guest_pagetable(lg);
+	mmput(lg->mm);
+	if (!IS_ERR(lg->dead))
+		kfree(lg->dead);
+	free_page(lg->regs_page);
+	memset(lg, 0, sizeof(*lg));
+	mutex_unlock(&lguest_lock);
+	return 0;
+}
+
+static struct file_operations lguest_fops = {
+	.owner	 = THIS_MODULE,
+	.release = close,
+	.write	 = write,
+	.read	 = read,
+};
+static struct miscdevice lguest_dev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "lguest",
+	.fops	= &lguest_fops,
+};
+
+int __init lguest_device_init(void)
+{
+	return misc_register(&lguest_dev);
+}
+
+void __exit lguest_device_remove(void)
+{
+	misc_deregister(&lguest_dev);
+}