KVM: PPC: Book3S: Add infrastructure to implement kernel-side RTAS calls

For pseries machine emulation, in order to move the interrupt
controller code to the kernel, we need to intercept some RTAS
calls in the kernel itself.  This adds an infrastructure to allow
in-kernel handlers to be registered for RTAS services by name.
A new ioctl, KVM_PPC_RTAS_DEFINE_TOKEN, then allows userspace to
associate token values with those service names.  Then, when the
guest requests an RTAS service with one of those token values, it
will be handled by the relevant in-kernel handler rather than being
passed up to userspace as at present.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
[agraf: fix warning]
Signed-off-by: Alexander Graf <agraf@suse.de>
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 4bc2c3d..cf4df8e 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -270,6 +270,9 @@
 #define H_SET_MODE		0x31C
 #define MAX_HCALL_OPCODE	H_SET_MODE
 
+/* Platform specific hcalls, used by KVM */
+#define H_RTAS			0xf000
+
 #ifndef __ASSEMBLY__
 
 /**
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 13740a6..311f7e6 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -259,6 +259,7 @@
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
+	struct list_head rtas_tokens;
 #endif
 #ifdef CONFIG_KVM_MPIC
 	struct openpic *mpic;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index df9c80b..8a30eb7 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -166,6 +166,10 @@
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
 
+extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
+extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
+extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 03c7819..eb9e25c 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -324,6 +324,12 @@
 	__u64 rma_size;
 };
 
+/* for KVM_CAP_PPC_RTAS */
+struct kvm_rtas_token_args {
+	char name[120];
+	__u64 token;	/* Use a token of 0 to undefine a mapping */
+};
+
 struct kvm_book3e_206_tlb_entry {
 	__u32 mas8;
 	__u32 mas1;
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 4eada0c..3faf5c0 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -86,6 +86,7 @@
 	emulate.o \
 	book3s.o \
 	book3s_64_vio.o \
+	book3s_rtas.o \
 	$(kvm-book3s_64-objs-y)
 
 kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 5af0f29..f3d7af7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -483,7 +483,7 @@
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
 	unsigned long target, ret = H_SUCCESS;
 	struct kvm_vcpu *tvcpu;
-	int idx;
+	int idx, rc;
 
 	switch (req) {
 	case H_ENTER:
@@ -519,6 +519,19 @@
 					kvmppc_get_gpr(vcpu, 5),
 					kvmppc_get_gpr(vcpu, 6));
 		break;
+	case H_RTAS:
+		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+			return RESUME_HOST;
+
+		rc = kvmppc_rtas_hcall(vcpu);
+
+		if (rc == -ENOENT)
+			return RESUME_HOST;
+		else if (rc == 0)
+			break;
+
+		/* Send the error out to userspace via KVM_RUN */
+		return rc;
 	default:
 		return RESUME_HOST;
 	}
@@ -1829,6 +1842,7 @@
 	cpumask_setall(&kvm->arch.need_tlb_flush);
 
 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
 
 	kvm->arch.rma = NULL;
 
@@ -1874,6 +1888,8 @@
 		kvm->arch.rma = NULL;
 	}
 
+	kvmppc_rtas_tokens_free(kvm);
+
 	kvmppc_free_hpt(kvm);
 	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
 }
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index c1cffa882..d09baf1 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1296,6 +1296,7 @@
 {
 #ifdef CONFIG_PPC64
 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
 #endif
 
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index ee02b30..4efa4a4 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -246,6 +246,13 @@
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		vcpu->stat.halt_wakeup++;
 		return EMULATE_DONE;
+	case H_RTAS:
+		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+			return RESUME_HOST;
+		if (kvmppc_rtas_hcall(vcpu))
+			break;
+		kvmppc_set_gpr(vcpu, 3, 0);
+		return EMULATE_DONE;
 	}
 
 	return EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
new file mode 100644
index 0000000..6ad7050
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/err.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/rtas.h>
+
+
+struct rtas_handler {
+	void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
+	char *name;
+};
+
+static struct rtas_handler rtas_handlers[] = { };
+
+struct rtas_token_definition {
+	struct list_head list;
+	struct rtas_handler *handler;
+	u64 token;
+};
+
+static int rtas_name_matches(char *s1, char *s2)
+{
+	struct kvm_rtas_token_args args;
+	return !strncmp(s1, s2, sizeof(args.name));
+}
+
+static int rtas_token_undefine(struct kvm *kvm, char *name)
+{
+	struct rtas_token_definition *d, *tmp;
+
+	lockdep_assert_held(&kvm->lock);
+
+	list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
+		if (rtas_name_matches(d->handler->name, name)) {
+			list_del(&d->list);
+			kfree(d);
+			return 0;
+		}
+	}
+
+	/* It's not an error to undefine an undefined token */
+	return 0;
+}
+
+static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
+{
+	struct rtas_token_definition *d;
+	struct rtas_handler *h = NULL;
+	bool found;
+	int i;
+
+	lockdep_assert_held(&kvm->lock);
+
+	list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
+		if (d->token == token)
+			return -EEXIST;
+	}
+
+	found = false;
+	for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
+		h = &rtas_handlers[i];
+		if (rtas_name_matches(h->name, name)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found)
+		return -ENOENT;
+
+	d = kzalloc(sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->handler = h;
+	d->token = token;
+
+	list_add_tail(&d->list, &kvm->arch.rtas_tokens);
+
+	return 0;
+}
+
+int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
+{
+	struct kvm_rtas_token_args args;
+	int rc;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	mutex_lock(&kvm->lock);
+
+	if (args.token)
+		rc = rtas_token_define(kvm, args.name, args.token);
+	else
+		rc = rtas_token_undefine(kvm, args.name);
+
+	mutex_unlock(&kvm->lock);
+
+	return rc;
+}
+
+int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
+{
+	struct rtas_token_definition *d;
+	struct rtas_args args;
+	rtas_arg_t *orig_rets;
+	gpa_t args_phys;
+	int rc;
+
+	/* r4 contains the guest physical address of the RTAS args */
+	args_phys = kvmppc_get_gpr(vcpu, 4);
+
+	rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+	if (rc)
+		goto fail;
+
+	/*
+	 * args->rets is a pointer into args->args. Now that we've
+	 * copied args we need to fix it up to point into our copy,
+	 * not the guest args. We also need to save the original
+	 * value so we can restore it on the way out.
+	 */
+	orig_rets = args.rets;
+	args.rets = &args.args[args.nargs];
+
+	mutex_lock(&vcpu->kvm->lock);
+
+	rc = -ENOENT;
+	list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
+		if (d->token == args.token) {
+			d->handler->handler(vcpu, &args);
+			rc = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&vcpu->kvm->lock);
+
+	if (rc == 0) {
+		args.rets = orig_rets;
+		rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+		if (rc)
+			goto fail;
+	}
+
+	return rc;
+
+fail:
+	/*
+	 * We only get here if the guest has called RTAS with a bogus
+	 * args pointer. That means we can't get to the args, and so we
+	 * can't fail the RTAS call. So fail right out to userspace,
+	 * which should kill the guest.
+	 */
+	return rc;
+}
+
+void kvmppc_rtas_tokens_free(struct kvm *kvm)
+{
+	struct rtas_token_definition *d, *tmp;
+
+	lockdep_assert_held(&kvm->lock);
+
+	list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
+		list_del(&d->list);
+		kfree(d);
+	}
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index d8e81e6..d4fd443 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -341,6 +341,7 @@
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CAP_SPAPR_TCE:
 	case KVM_CAP_PPC_ALLOC_HTAB:
+	case KVM_CAP_PPC_RTAS:
 		r = 1;
 		break;
 #endif /* CONFIG_PPC_BOOK3S_64 */
@@ -986,6 +987,7 @@
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	case KVM_ALLOCATE_RMA: {
 		struct kvm_allocate_rma rma;
+		struct kvm *kvm = filp->private_data;
 
 		r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
 		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
@@ -1030,6 +1032,12 @@
 			r = -EFAULT;
 		break;
 	}
+	case KVM_PPC_RTAS_DEFINE_TOKEN: {
+		struct kvm *kvm = filp->private_data;
+
+		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
+		break;
+	}
 #endif /* CONFIG_PPC_BOOK3S_64 */
 	default:
 		r = -ENOTTY;