Blame - arch/powerpc/kvm/book3s_hv.c - kernel/msm-4.9

blob: 36b6d98f11970b42cf96678ba6abcc14ba84052e [file] [log] [blame]

Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	1	/*
				2	* Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
				3	* Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
				4	*
				5	* Authors:
				6	* Paul Mackerras <paulus@au1.ibm.com>
				7	* Alexander Graf <agraf@suse.de>
				8	* Kevin Wolf <mail@kevin-wolf.de>
				9	*
				10	* Description: KVM functions specific to running on Book 3S
				11	* processors in hypervisor mode (specifically POWER7 and later).
				12	*
				13	* This file is derived from arch/powerpc/kvm/book3s.c,
				14	* by Alexander Graf <agraf@suse.de>.
				15	*
				16	* This program is free software; you can redistribute it and/or modify
				17	* it under the terms of the GNU General Public License, version 2, as
				18	* published by the Free Software Foundation.
				19	*/
				20
				21	#include <linux/kvm_host.h>
				22	#include <linux/err.h>
				23	#include <linux/slab.h>
				24	#include <linux/preempt.h>
				25	#include <linux/sched.h>
				26	#include <linux/delay.h>
				27	#include <linux/fs.h>
				28	#include <linux/anon_inodes.h>
				29	#include <linux/cpumask.h>
				30
				31	#include <asm/reg.h>
				32	#include <asm/cputable.h>
				33	#include <asm/cacheflush.h>
				34	#include <asm/tlbflush.h>
				35	#include <asm/uaccess.h>
				36	#include <asm/io.h>
				37	#include <asm/kvm_ppc.h>
				38	#include <asm/kvm_book3s.h>
				39	#include <asm/mmu_context.h>
				40	#include <asm/lppaca.h>
				41	#include <asm/processor.h>
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	42	#include <asm/cputhreads.h>
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	43	#include <linux/gfp.h>
				44	#include <linux/sched.h>
				45	#include <linux/vmalloc.h>
				46	#include <linux/highmem.h>
				47
				48	/* #define EXIT_DEBUG */
				49	/* #define EXIT_DEBUG_SIMPLE */
				50	/* #define EXIT_DEBUG_INT */
				51
				52	void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
				53	{
				54	local_paca->kvm_hstate.kvm_vcpu = vcpu;
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	55	local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	56	}
				57
				58	void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
				59	{
				60	}
				61
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	62	static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
				63	static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
				64
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	65	void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
				66	{
				67	u64 now;
				68	unsigned long dec_nsec;
				69
				70	now = get_tb();
				71	if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu))
				72	kvmppc_core_queue_dec(vcpu);
				73	if (vcpu->arch.pending_exceptions)
				74	return;
				75	if (vcpu->arch.dec_expires != ~(u64)0) {
				76	dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC /
				77	tb_ticks_per_sec;
				78	hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
				79	HRTIMER_MODE_REL);
				80	}
				81
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	82	kvmppc_vcpu_blocked(vcpu);
				83
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	84	kvm_vcpu_block(vcpu);
				85	vcpu->stat.halt_wakeup++;
				86
				87	if (vcpu->arch.dec_expires != ~(u64)0)
				88	hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	89
				90	kvmppc_vcpu_unblocked(vcpu);
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	91	}
				92
				93	void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
				94	{
				95	vcpu->arch.shregs.msr = msr;
				96	}
				97
				98	void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
				99	{
				100	vcpu->arch.pvr = pvr;
				101	}
				102
				103	void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
				104	{
				105	int r;
				106
				107	pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
				108	pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
				109	vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
				110	for (r = 0; r < 16; ++r)
				111	pr_err("r%2d = %.16lx r%d = %.16lx\n",
				112	r, kvmppc_get_gpr(vcpu, r),
				113	r+16, kvmppc_get_gpr(vcpu, r+16));
				114	pr_err("ctr = %.16lx lr = %.16lx\n",
				115	vcpu->arch.ctr, vcpu->arch.lr);
				116	pr_err("srr0 = %.16llx srr1 = %.16llx\n",
				117	vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
				118	pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
				119	vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
				120	pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
				121	vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
				122	pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
				123	vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
				124	pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
				125	pr_err("fault dar = %.16lx dsisr = %.8x\n",
				126	vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
				127	pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
				128	for (r = 0; r < vcpu->arch.slb_max; ++r)
				129	pr_err(" ESID = %.16llx VSID = %.16llx\n",
				130	vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
				131	pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
				132	vcpu->arch.lpcr, vcpu->kvm->arch.sdr1,
				133	vcpu->arch.last_inst);
				134	}
				135
Paul Mackerras	a8606e2	2011-06-29 00:22:05 +0000	[diff] [blame]	136	struct kvm_vcpu kvmppc_find_vcpu(struct kvm kvm, int id)
				137	{
				138	int r;
				139	struct kvm_vcpu v, ret = NULL;
				140
				141	mutex_lock(&kvm->lock);
				142	kvm_for_each_vcpu(r, v, kvm) {
				143	if (v->vcpu_id == id) {
				144	ret = v;
				145	break;
				146	}
				147	}
				148	mutex_unlock(&kvm->lock);
				149	return ret;
				150	}
				151
				152	static void init_vpa(struct kvm_vcpu vcpu, struct lppaca vpa)
				153	{
				154	vpa->shared_proc = 1;
				155	vpa->yield_count = 1;
				156	}
				157
				158	static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
				159	unsigned long flags,
				160	unsigned long vcpuid, unsigned long vpa)
				161	{
				162	struct kvm *kvm = vcpu->kvm;
				163	unsigned long pg_index, ra, len;
				164	unsigned long pg_offset;
				165	void *va;
				166	struct kvm_vcpu *tvcpu;
				167
				168	tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
				169	if (!tvcpu)
				170	return H_PARAMETER;
				171
				172	flags >>= 63 - 18;
				173	flags &= 7;
				174	if (flags == 0 \|\| flags == 4)
				175	return H_PARAMETER;
				176	if (flags < 4) {
				177	if (vpa & 0x7f)
				178	return H_PARAMETER;
				179	/* registering new area; convert logical addr to real */
				180	pg_index = vpa >> kvm->arch.ram_porder;
				181	pg_offset = vpa & (kvm->arch.ram_psize - 1);
				182	if (pg_index >= kvm->arch.ram_npages)
				183	return H_PARAMETER;
				184	if (kvm->arch.ram_pginfo[pg_index].pfn == 0)
				185	return H_PARAMETER;
				186	ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT;
				187	ra \|= pg_offset;
				188	va = __va(ra);
				189	if (flags <= 1)
				190	len = (unsigned short )(va + 4);
				191	else
				192	len = (unsigned int )(va + 4);
				193	if (pg_offset + len > kvm->arch.ram_psize)
				194	return H_PARAMETER;
				195	switch (flags) {
				196	case 1: /* register VPA */
				197	if (len < 640)
				198	return H_PARAMETER;
				199	tvcpu->arch.vpa = va;
				200	init_vpa(vcpu, va);
				201	break;
				202	case 2: /* register DTL */
				203	if (len < 48)
				204	return H_PARAMETER;
				205	if (!tvcpu->arch.vpa)
				206	return H_RESOURCE;
				207	len -= len % 48;
				208	tvcpu->arch.dtl = va;
				209	tvcpu->arch.dtl_end = va + len;
				210	break;
				211	case 3: /* register SLB shadow buffer */
				212	if (len < 8)
				213	return H_PARAMETER;
				214	if (!tvcpu->arch.vpa)
				215	return H_RESOURCE;
				216	tvcpu->arch.slb_shadow = va;
				217	len = (len - 16) / 16;
				218	tvcpu->arch.slb_shadow = va;
				219	break;
				220	}
				221	} else {
				222	switch (flags) {
				223	case 5: /* unregister VPA */
				224	if (tvcpu->arch.slb_shadow \|\| tvcpu->arch.dtl)
				225	return H_RESOURCE;
				226	tvcpu->arch.vpa = NULL;
				227	break;
				228	case 6: /* unregister DTL */
				229	tvcpu->arch.dtl = NULL;
				230	break;
				231	case 7: /* unregister SLB shadow buffer */
				232	tvcpu->arch.slb_shadow = NULL;
				233	break;
				234	}
				235	}
				236	return H_SUCCESS;
				237	}
				238
				239	int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
				240	{
				241	unsigned long req = kvmppc_get_gpr(vcpu, 3);
				242	unsigned long target, ret = H_SUCCESS;
				243	struct kvm_vcpu *tvcpu;
				244
				245	switch (req) {
				246	case H_CEDE:
				247	vcpu->arch.shregs.msr \|= MSR_EE;
				248	vcpu->arch.ceded = 1;
				249	smp_mb();
				250	if (!vcpu->arch.prodded)
				251	kvmppc_vcpu_block(vcpu);
				252	else
				253	vcpu->arch.prodded = 0;
				254	smp_mb();
				255	vcpu->arch.ceded = 0;
				256	break;
				257	case H_PROD:
				258	target = kvmppc_get_gpr(vcpu, 4);
				259	tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
				260	if (!tvcpu) {
				261	ret = H_PARAMETER;
				262	break;
				263	}
				264	tvcpu->arch.prodded = 1;
				265	smp_mb();
				266	if (vcpu->arch.ceded) {
				267	if (waitqueue_active(&vcpu->wq)) {
				268	wake_up_interruptible(&vcpu->wq);
				269	vcpu->stat.halt_wakeup++;
				270	}
				271	}
				272	break;
				273	case H_CONFER:
				274	break;
				275	case H_REGISTER_VPA:
				276	ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
				277	kvmppc_get_gpr(vcpu, 5),
				278	kvmppc_get_gpr(vcpu, 6));
				279	break;
				280	default:
				281	return RESUME_HOST;
				282	}
				283	kvmppc_set_gpr(vcpu, 3, ret);
				284	vcpu->arch.hcall_needed = 0;
				285	return RESUME_GUEST;
				286	}
				287
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	288	static int kvmppc_handle_exit(struct kvm_run run, struct kvm_vcpu vcpu,
				289	struct task_struct *tsk)
				290	{
				291	int r = RESUME_HOST;
				292
				293	vcpu->stat.sum_exits++;
				294
				295	run->exit_reason = KVM_EXIT_UNKNOWN;
				296	run->ready_for_interrupt_injection = 1;
				297	switch (vcpu->arch.trap) {
				298	/* We're good on these - the host merely wanted to get our attention */
				299	case BOOK3S_INTERRUPT_HV_DECREMENTER:
				300	vcpu->stat.dec_exits++;
				301	r = RESUME_GUEST;
				302	break;
				303	case BOOK3S_INTERRUPT_EXTERNAL:
				304	vcpu->stat.ext_intr_exits++;
				305	r = RESUME_GUEST;
				306	break;
				307	case BOOK3S_INTERRUPT_PERFMON:
				308	r = RESUME_GUEST;
				309	break;
				310	case BOOK3S_INTERRUPT_PROGRAM:
				311	{
				312	ulong flags;
				313	/*
				314	* Normally program interrupts are delivered directly
				315	* to the guest by the hardware, but we can get here
				316	* as a result of a hypervisor emulation interrupt
				317	* (e40) getting turned into a 700 by BML RTAS.
				318	*/
				319	flags = vcpu->arch.shregs.msr & 0x1f0000ull;
				320	kvmppc_core_queue_program(vcpu, flags);
				321	r = RESUME_GUEST;
				322	break;
				323	}
				324	case BOOK3S_INTERRUPT_SYSCALL:
				325	{
				326	/* hcall - punt to userspace */
				327	int i;
				328
				329	if (vcpu->arch.shregs.msr & MSR_PR) {
				330	/* sc 1 from userspace - reflect to guest syscall */
				331	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
				332	r = RESUME_GUEST;
				333	break;
				334	}
				335	run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
				336	for (i = 0; i < 9; ++i)
				337	run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
				338	run->exit_reason = KVM_EXIT_PAPR_HCALL;
				339	vcpu->arch.hcall_needed = 1;
				340	r = RESUME_HOST;
				341	break;
				342	}
				343	/*
				344	* We get these next two if the guest does a bad real-mode access,
				345	* as we have enabled VRMA (virtualized real mode area) mode in the
				346	* LPCR. We just generate an appropriate DSI/ISI to the guest.
				347	*/
				348	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
				349	vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr;
				350	vcpu->arch.shregs.dar = vcpu->arch.fault_dar;
				351	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0);
				352	r = RESUME_GUEST;
				353	break;
				354	case BOOK3S_INTERRUPT_H_INST_STORAGE:
				355	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE,
				356	0x08000000);
				357	r = RESUME_GUEST;
				358	break;
				359	/*
				360	* This occurs if the guest executes an illegal instruction.
				361	* We just generate a program interrupt to the guest, since
				362	* we don't emulate any guest instructions at this stage.
				363	*/
				364	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
				365	kvmppc_core_queue_program(vcpu, 0x80000);
				366	r = RESUME_GUEST;
				367	break;
				368	default:
				369	kvmppc_dump_regs(vcpu);
				370	printk(KERN_EMERG "trap=0x%x \| pc=0x%lx \| msr=0x%llx\n",
				371	vcpu->arch.trap, kvmppc_get_pc(vcpu),
				372	vcpu->arch.shregs.msr);
				373	r = RESUME_HOST;
				374	BUG();
				375	break;
				376	}
				377
				378
				379	if (!(r & RESUME_HOST)) {
				380	/* To avoid clobbering exit_reason, only check for signals if
				381	* we aren't already exiting to userspace for some other
				382	* reason. */
				383	if (signal_pending(tsk)) {
				384	vcpu->stat.signal_exits++;
				385	run->exit_reason = KVM_EXIT_INTR;
				386	r = -EINTR;
				387	} else {
				388	kvmppc_core_deliver_interrupts(vcpu);
				389	}
				390	}
				391
				392	return r;
				393	}
				394
				395	int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
				396	struct kvm_sregs *sregs)
				397	{
				398	int i;
				399
				400	sregs->pvr = vcpu->arch.pvr;
				401
				402	memset(sregs, 0, sizeof(struct kvm_sregs));
				403	for (i = 0; i < vcpu->arch.slb_max; i++) {
				404	sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
				405	sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
				406	}
				407
				408	return 0;
				409	}
				410
				411	int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
				412	struct kvm_sregs *sregs)
				413	{
				414	int i, j;
				415
				416	kvmppc_set_pvr(vcpu, sregs->pvr);
				417
				418	j = 0;
				419	for (i = 0; i < vcpu->arch.slb_nr; i++) {
				420	if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
				421	vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
				422	vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
				423	++j;
				424	}
				425	}
				426	vcpu->arch.slb_max = j;
				427
				428	return 0;
				429	}
				430
				431	int kvmppc_core_check_processor_compat(void)
				432	{
				433	if (cpu_has_feature(CPU_FTR_HVMODE_206))
				434	return 0;
				435	return -EIO;
				436	}
				437
				438	struct kvm_vcpu kvmppc_core_vcpu_create(struct kvm kvm, unsigned int id)
				439	{
				440	struct kvm_vcpu *vcpu;
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	441	int err = -EINVAL;
				442	int core;
				443	struct kvmppc_vcore *vcore;
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	444	unsigned long lpcr;
				445
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	446	core = id / threads_per_core;
				447	if (core >= KVM_MAX_VCORES)
				448	goto out;
				449
				450	err = -ENOMEM;
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	451	vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
				452	if (!vcpu)
				453	goto out;
				454
				455	err = kvm_vcpu_init(vcpu, kvm, id);
				456	if (err)
				457	goto free_vcpu;
				458
				459	vcpu->arch.shared = &vcpu->arch.shregs;
				460	vcpu->arch.last_cpu = -1;
				461	vcpu->arch.mmcr[0] = MMCR0_FC;
				462	vcpu->arch.ctrl = CTRL_RUNLATCH;
				463	/* default to host PVR, since we can't spoof it */
				464	vcpu->arch.pvr = mfspr(SPRN_PVR);
				465	kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
				466
				467	lpcr = kvm->arch.host_lpcr & (LPCR_PECE \| LPCR_LPES);
				468	lpcr \|= LPCR_VPM0 \| LPCR_VRMA_L \| (4UL << LPCR_DPFD_SH) \| LPCR_HDICE;
				469	vcpu->arch.lpcr = lpcr;
				470
				471	kvmppc_mmu_book3s_hv_init(vcpu);
				472
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	473	/*
				474	* Some vcpus may start out in stopped state. If we initialize
				475	* them to busy-in-host state they will stop other vcpus in the
				476	* vcore from running. Instead we initialize them to blocked
				477	* state, effectively considering them to be stopped until we
				478	* see the first run ioctl for them.
				479	*/
				480	vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
				481
				482	init_waitqueue_head(&vcpu->arch.cpu_run);
				483
				484	mutex_lock(&kvm->lock);
				485	vcore = kvm->arch.vcores[core];
				486	if (!vcore) {
				487	vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
				488	if (vcore) {
				489	INIT_LIST_HEAD(&vcore->runnable_threads);
				490	spin_lock_init(&vcore->lock);
				491	}
				492	kvm->arch.vcores[core] = vcore;
				493	}
				494	mutex_unlock(&kvm->lock);
				495
				496	if (!vcore)
				497	goto free_vcpu;
				498
				499	spin_lock(&vcore->lock);
				500	++vcore->num_threads;
				501	++vcore->n_blocked;
				502	spin_unlock(&vcore->lock);
				503	vcpu->arch.vcore = vcore;
				504
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	505	return vcpu;
				506
				507	free_vcpu:
				508	kfree(vcpu);
				509	out:
				510	return ERR_PTR(err);
				511	}
				512
				513	void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
				514	{
				515	kvm_vcpu_uninit(vcpu);
				516	kfree(vcpu);
				517	}
				518
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	519	static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	520	{
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	521	struct kvmppc_vcore *vc = vcpu->arch.vcore;
				522
				523	spin_lock(&vc->lock);
				524	vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
				525	++vc->n_blocked;
				526	if (vc->n_runnable > 0 &&
				527	vc->n_runnable + vc->n_blocked == vc->num_threads) {
				528	vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
				529	arch.run_list);
				530	wake_up(&vcpu->arch.cpu_run);
				531	}
				532	spin_unlock(&vc->lock);
				533	}
				534
				535	static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
				536	{
				537	struct kvmppc_vcore *vc = vcpu->arch.vcore;
				538
				539	spin_lock(&vc->lock);
				540	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
				541	--vc->n_blocked;
				542	spin_unlock(&vc->lock);
				543	}
				544
				545	extern int __kvmppc_vcore_entry(struct kvm_run kvm_run, struct kvm_vcpu vcpu);
				546	extern void xics_wake_cpu(int cpu);
				547
				548	static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
				549	struct kvm_vcpu *vcpu)
				550	{
				551	struct kvm_vcpu *v;
				552
				553	if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
				554	return;
				555	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
				556	--vc->n_runnable;
				557	/* decrement the physical thread id of each following vcpu */
				558	v = vcpu;
				559	list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
				560	--v->arch.ptid;
				561	list_del(&vcpu->arch.run_list);
				562	}
				563
				564	static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
				565	{
				566	int cpu;
				567	struct paca_struct *tpaca;
				568	struct kvmppc_vcore *vc = vcpu->arch.vcore;
				569
				570	cpu = vc->pcpu + vcpu->arch.ptid;
				571	tpaca = &paca[cpu];
				572	tpaca->kvm_hstate.kvm_vcpu = vcpu;
				573	tpaca->kvm_hstate.kvm_vcore = vc;
				574	smp_wmb();
				575	#ifdef CONFIG_PPC_ICP_NATIVE
				576	if (vcpu->arch.ptid) {
				577	tpaca->cpu_start = 0x80;
				578	tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
				579	wmb();
				580	xics_wake_cpu(cpu);
				581	++vc->n_woken;
				582	}
				583	#endif
				584	}
				585
				586	static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
				587	{
				588	int i;
				589
				590	HMT_low();
				591	i = 0;
				592	while (vc->nap_count < vc->n_woken) {
				593	if (++i >= 1000000) {
				594	pr_err("kvmppc_wait_for_nap timeout %d %d\n",
				595	vc->nap_count, vc->n_woken);
				596	break;
				597	}
				598	cpu_relax();
				599	}
				600	HMT_medium();
				601	}
				602
				603	/*
				604	* Check that we are on thread 0 and that any other threads in
				605	* this core are off-line.
				606	*/
				607	static int on_primary_thread(void)
				608	{
				609	int cpu = smp_processor_id();
				610	int thr = cpu_thread_in_core(cpu);
				611
				612	if (thr)
				613	return 0;
				614	while (++thr < threads_per_core)
				615	if (cpu_online(cpu + thr))
				616	return 0;
				617	return 1;
				618	}
				619
				620	/*
				621	* Run a set of guest threads on a physical core.
				622	* Called with vc->lock held.
				623	*/
				624	static int kvmppc_run_core(struct kvmppc_vcore *vc)
				625	{
				626	struct kvm_vcpu vcpu, vnext;
				627	long ret;
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	628	u64 now;
				629
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	630	/* don't start if any threads have a signal pending */
				631	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
				632	if (signal_pending(vcpu->arch.run_task))
				633	return 0;
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	634
				635	/*
				636	* Make sure we are running on thread 0, and that
				637	* secondary threads are offline.
				638	* XXX we should also block attempts to bring any
				639	* secondary threads online.
				640	*/
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	641	if (threads_per_core > 1 && !on_primary_thread()) {
				642	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
				643	vcpu->arch.ret = -EBUSY;
				644	goto out;
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	645	}
				646
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	647	vc->n_woken = 0;
				648	vc->nap_count = 0;
				649	vc->entry_exit_count = 0;
				650	vc->vcore_running = 1;
				651	vc->in_guest = 0;
				652	vc->pcpu = smp_processor_id();
				653	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
				654	kvmppc_start_thread(vcpu);
				655	vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
				656	arch.run_list);
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	657
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	658	spin_unlock(&vc->lock);
				659
				660	preempt_disable();
				661	kvm_guest_enter();
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	662	__kvmppc_vcore_entry(NULL, vcpu);
				663
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	664	/* wait for secondary threads to finish writing their state to memory */
				665	spin_lock(&vc->lock);
				666	if (vc->nap_count < vc->n_woken)
				667	kvmppc_wait_for_nap(vc);
				668	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
				669	vc->vcore_running = 2;
				670	spin_unlock(&vc->lock);
				671
				672	/* make sure updates to secondary vcpu structs are visible now */
				673	smp_mb();
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	674	kvm_guest_exit();
				675
				676	preempt_enable();
				677	kvm_resched(vcpu);
				678
				679	now = get_tb();
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	680	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
				681	/* cancel pending dec exception if dec is positive */
				682	if (now < vcpu->arch.dec_expires &&
				683	kvmppc_core_pending_dec(vcpu))
				684	kvmppc_core_dequeue_dec(vcpu);
				685	if (!vcpu->arch.trap) {
				686	if (signal_pending(vcpu->arch.run_task)) {
				687	vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
				688	vcpu->arch.ret = -EINTR;
				689	}
				690	continue; /* didn't get to run */
				691	}
				692	ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
				693	vcpu->arch.run_task);
				694	vcpu->arch.ret = ret;
				695	vcpu->arch.trap = 0;
				696	}
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	697
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	698	spin_lock(&vc->lock);
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	699	out:
Paul Mackerras	371fefd	2011-06-29 00:23:08 +0000	[diff] [blame^]	700	vc->vcore_running = 0;
				701	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
				702	arch.run_list) {
				703	if (vcpu->arch.ret != RESUME_GUEST) {
				704	kvmppc_remove_runnable(vc, vcpu);
				705	wake_up(&vcpu->arch.cpu_run);
				706	}
				707	}
				708
				709	return 1;
				710	}
				711
				712	static int kvmppc_run_vcpu(struct kvm_run kvm_run, struct kvm_vcpu vcpu)
				713	{
				714	int ptid;
				715	int wait_state;
				716	struct kvmppc_vcore *vc;
				717	DEFINE_WAIT(wait);
				718
				719	/* No need to go into the guest when all we do is going out */
				720	if (signal_pending(current)) {
				721	kvm_run->exit_reason = KVM_EXIT_INTR;
				722	return -EINTR;
				723	}
				724
				725	kvm_run->exit_reason = 0;
				726	vcpu->arch.ret = RESUME_GUEST;
				727	vcpu->arch.trap = 0;
				728
				729	flush_fp_to_thread(current);
				730	flush_altivec_to_thread(current);
				731	flush_vsx_to_thread(current);
				732
				733	/*
				734	* Synchronize with other threads in this virtual core
				735	*/
				736	vc = vcpu->arch.vcore;
				737	spin_lock(&vc->lock);
				738	/* This happens the first time this is called for a vcpu */
				739	if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
				740	--vc->n_blocked;
				741	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
				742	ptid = vc->n_runnable;
				743	vcpu->arch.run_task = current;
				744	vcpu->arch.kvm_run = kvm_run;
				745	vcpu->arch.ptid = ptid;
				746	list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
				747	++vc->n_runnable;
				748
				749	wait_state = TASK_INTERRUPTIBLE;
				750	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
				751	if (signal_pending(current)) {
				752	if (!vc->vcore_running) {
				753	kvm_run->exit_reason = KVM_EXIT_INTR;
				754	vcpu->arch.ret = -EINTR;
				755	break;
				756	}
				757	/* have to wait for vcore to stop executing guest */
				758	wait_state = TASK_UNINTERRUPTIBLE;
				759	smp_send_reschedule(vc->pcpu);
				760	}
				761
				762	if (!vc->vcore_running &&
				763	vc->n_runnable + vc->n_blocked == vc->num_threads) {
				764	/* we can run now */
				765	if (kvmppc_run_core(vc))
				766	continue;
				767	}
				768
				769	if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
				770	kvmppc_start_thread(vcpu);
				771
				772	/* wait for other threads to come in, or wait for vcore */
				773	prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
				774	spin_unlock(&vc->lock);
				775	schedule();
				776	finish_wait(&vcpu->arch.cpu_run, &wait);
				777	spin_lock(&vc->lock);
				778	}
				779
				780	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
				781	kvmppc_remove_runnable(vc, vcpu);
				782	spin_unlock(&vc->lock);
				783
				784	return vcpu->arch.ret;
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	785	}
				786
Paul Mackerras	a8606e2	2011-06-29 00:22:05 +0000	[diff] [blame]	787	int kvmppc_vcpu_run(struct kvm_run run, struct kvm_vcpu vcpu)
				788	{
				789	int r;
				790
				791	do {
				792	r = kvmppc_run_vcpu(run, vcpu);
				793
				794	if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
				795	!(vcpu->arch.shregs.msr & MSR_PR)) {
				796	r = kvmppc_pseries_do_hcall(vcpu);
				797	kvmppc_core_deliver_interrupts(vcpu);
				798	}
				799	} while (r == RESUME_GUEST);
				800	return r;
				801	}
				802
David Gibson	54738c0	2011-06-29 00:22:41 +0000	[diff] [blame]	803	static long kvmppc_stt_npages(unsigned long window_size)
				804	{
				805	return ALIGN((window_size >> SPAPR_TCE_SHIFT)
				806	* sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
				807	}
				808
				809	static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
				810	{
				811	struct kvm *kvm = stt->kvm;
				812	int i;
				813
				814	mutex_lock(&kvm->lock);
				815	list_del(&stt->list);
				816	for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
				817	__free_page(stt->pages[i]);
				818	kfree(stt);
				819	mutex_unlock(&kvm->lock);
				820
				821	kvm_put_kvm(kvm);
				822	}
				823
				824	static int kvm_spapr_tce_fault(struct vm_area_struct vma, struct vm_fault vmf)
				825	{
				826	struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
				827	struct page *page;
				828
				829	if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
				830	return VM_FAULT_SIGBUS;
				831
				832	page = stt->pages[vmf->pgoff];
				833	get_page(page);
				834	vmf->page = page;
				835	return 0;
				836	}
				837
				838	static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
				839	.fault = kvm_spapr_tce_fault,
				840	};
				841
				842	static int kvm_spapr_tce_mmap(struct file file, struct vm_area_struct vma)
				843	{
				844	vma->vm_ops = &kvm_spapr_tce_vm_ops;
				845	return 0;
				846	}
				847
				848	static int kvm_spapr_tce_release(struct inode inode, struct file filp)
				849	{
				850	struct kvmppc_spapr_tce_table *stt = filp->private_data;
				851
				852	release_spapr_tce_table(stt);
				853	return 0;
				854	}
				855
				856	static struct file_operations kvm_spapr_tce_fops = {
				857	.mmap = kvm_spapr_tce_mmap,
				858	.release = kvm_spapr_tce_release,
				859	};
				860
				861	long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
				862	struct kvm_create_spapr_tce *args)
				863	{
				864	struct kvmppc_spapr_tce_table *stt = NULL;
				865	long npages;
				866	int ret = -ENOMEM;
				867	int i;
				868
				869	/* Check this LIOBN hasn't been previously allocated */
				870	list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
				871	if (stt->liobn == args->liobn)
				872	return -EBUSY;
				873	}
				874
				875	npages = kvmppc_stt_npages(args->window_size);
				876
				877	stt = kzalloc(sizeof(stt) + npages sizeof(struct page *),
				878	GFP_KERNEL);
				879	if (!stt)
				880	goto fail;
				881
				882	stt->liobn = args->liobn;
				883	stt->window_size = args->window_size;
				884	stt->kvm = kvm;
				885
				886	for (i = 0; i < npages; i++) {
				887	stt->pages[i] = alloc_page(GFP_KERNEL \| __GFP_ZERO);
				888	if (!stt->pages[i])
				889	goto fail;
				890	}
				891
				892	kvm_get_kvm(kvm);
				893
				894	mutex_lock(&kvm->lock);
				895	list_add(&stt->list, &kvm->arch.spapr_tce_tables);
				896
				897	mutex_unlock(&kvm->lock);
				898
				899	return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
				900	stt, O_RDWR);
				901
				902	fail:
				903	if (stt) {
				904	for (i = 0; i < npages; i++)
				905	if (stt->pages[i])
				906	__free_page(stt->pages[i]);
				907
				908	kfree(stt);
				909	}
				910	return ret;
				911	}
				912
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	913	int kvmppc_core_prepare_memory_region(struct kvm *kvm,
				914	struct kvm_userspace_memory_region *mem)
				915	{
				916	if (mem->guest_phys_addr == 0 && mem->memory_size != 0)
				917	return kvmppc_prepare_vrma(kvm, mem);
				918	return 0;
				919	}
				920
				921	void kvmppc_core_commit_memory_region(struct kvm *kvm,
				922	struct kvm_userspace_memory_region *mem)
				923	{
				924	if (mem->guest_phys_addr == 0 && mem->memory_size != 0)
				925	kvmppc_map_vrma(kvm, mem);
				926	}
				927
				928	int kvmppc_core_init_vm(struct kvm *kvm)
				929	{
				930	long r;
				931
				932	/* Allocate hashed page table */
				933	r = kvmppc_alloc_hpt(kvm);
David Gibson	54738c0	2011-06-29 00:22:41 +0000	[diff] [blame]	934	if (r)
				935	return r;
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	936
David Gibson	54738c0	2011-06-29 00:22:41 +0000	[diff] [blame]	937	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
				938	return 0;
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	939	}
				940
				941	void kvmppc_core_destroy_vm(struct kvm *kvm)
				942	{
				943	kvmppc_free_hpt(kvm);
David Gibson	54738c0	2011-06-29 00:22:41 +0000	[diff] [blame]	944	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
Paul Mackerras	de56a94	2011-06-29 00:21:34 +0000	[diff] [blame]	945	}
				946
				947	/* These are stubs for now */
				948	void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
				949	{
				950	}
				951
				952	/* We don't need to emulate any privileged instructions or dcbz */
				953	int kvmppc_core_emulate_op(struct kvm_run run, struct kvm_vcpu vcpu,
				954	unsigned int inst, int *advance)
				955	{
				956	return EMULATE_FAIL;
				957	}
				958
				959	int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
				960	{
				961	return EMULATE_FAIL;
				962	}
				963
				964	int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
				965	{
				966	return EMULATE_FAIL;
				967	}
				968
				969	static int kvmppc_book3s_hv_init(void)
				970	{
				971	int r;
				972
				973	r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
				974
				975	if (r)
				976	return r;
				977
				978	r = kvmppc_mmu_hv_init();
				979
				980	return r;
				981	}
				982
				983	static void kvmppc_book3s_hv_exit(void)
				984	{
				985	kvm_exit();
				986	}
				987
				988	module_init(kvmppc_book3s_hv_init);
				989	module_exit(kvmppc_book3s_hv_exit);