Blame - arch/arm/kvm/mmu.c - kernel/msm-4.19

blob: a4b7b0f900e5da8141500cabe62ffb0f7cbd7149 [file] [log] [blame]

Christoffer Dall	749cf76c	2013-01-20 18:28:06 -0500	[diff] [blame]	1	/*
				2	* Copyright (C) 2012 - Virtual Open Systems and Columbia University
				3	* Author: Christoffer Dall <c.dall@virtualopensystems.com>
				4	*
				5	* This program is free software; you can redistribute it and/or modify
				6	* it under the terms of the GNU General Public License, version 2, as
				7	* published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write to the Free Software
				16	* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
				17	*/
Christoffer Dall	342cd0a	2013-01-20 18:28:06 -0500	[diff] [blame]	18
				19	#include <linux/mman.h>
				20	#include <linux/kvm_host.h>
				21	#include <linux/io.h>
				22	#include <asm/idmap.h>
				23	#include <asm/pgalloc.h>
Christoffer Dall	94f8e64	2013-01-20 18:28:12 -0500	[diff] [blame^]	24	#include <asm/cacheflush.h>
Christoffer Dall	342cd0a	2013-01-20 18:28:06 -0500	[diff] [blame]	25	#include <asm/kvm_arm.h>
				26	#include <asm/kvm_mmu.h>
Christoffer Dall	d5d8184	2013-01-20 18:28:07 -0500	[diff] [blame]	27	#include <asm/kvm_asm.h>
Christoffer Dall	94f8e64	2013-01-20 18:28:12 -0500	[diff] [blame^]	28	#include <asm/kvm_emulate.h>
Christoffer Dall	342cd0a	2013-01-20 18:28:06 -0500	[diff] [blame]	29	#include <asm/mach/map.h>
Christoffer Dall	d5d8184	2013-01-20 18:28:07 -0500	[diff] [blame]	30	#include <trace/events/kvm.h>
				31
				32	#include "trace.h"
Christoffer Dall	342cd0a	2013-01-20 18:28:06 -0500	[diff] [blame]	33
				34	extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
				35
				36	static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
				37
Christoffer Dall	d5d8184	2013-01-20 18:28:07 -0500	[diff] [blame]	38	static void kvm_tlb_flush_vmid(struct kvm *kvm)
				39	{
				40	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
				41	}
				42
Christoffer Dall	342cd0a	2013-01-20 18:28:06 -0500	[diff] [blame]	43	static void kvm_set_pte(pte_t *pte, pte_t new_pte)
				44	{
				45	pte_val(*pte) = new_pte;
				46	/*
				47	* flush_pmd_entry just takes a void pointer and cleans the necessary
				48	* cache entries, so we can reuse the function for ptes.
				49	*/
				50	flush_pmd_entry(pte);
				51	}
				52
Christoffer Dall	d5d8184	2013-01-20 18:28:07 -0500	[diff] [blame]	53	static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
				54	int min, int max)
				55	{
				56	void *page;
				57
				58	BUG_ON(max > KVM_NR_MEM_OBJS);
				59	if (cache->nobjs >= min)
				60	return 0;
				61	while (cache->nobjs < max) {
				62	page = (void *)__get_free_page(PGALLOC_GFP);
				63	if (!page)
				64	return -ENOMEM;
				65	cache->objects[cache->nobjs++] = page;
				66	}
				67	return 0;
				68	}
				69
				70	static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
				71	{
				72	while (mc->nobjs)
				73	free_page((unsigned long)mc->objects[--mc->nobjs]);
				74	}
				75
				76	static void mmu_memory_cache_alloc(struct kvm_mmu_memory_cache mc)
				77	{
				78	void *p;
				79
				80	BUG_ON(!mc \|\| !mc->nobjs);
				81	p = mc->objects[--mc->nobjs];
				82	return p;
				83	}
				84
Christoffer Dall	342cd0a	2013-01-20 18:28:06 -0500	[diff] [blame]	85	static void free_ptes(pmd_t *pmd, unsigned long addr)
				86	{
				87	pte_t *pte;
				88	unsigned int i;
				89
				90	for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
				91	if (!pmd_none(pmd) && pmd_table(pmd)) {
				92	pte = pte_offset_kernel(pmd, addr);
				93	pte_free_kernel(NULL, pte);
				94	}
				95	pmd++;
				96	}
				97	}
				98
				99	/**
				100	* free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
				101	*
				102	* Assumes this is a page table used strictly in Hyp-mode and therefore contains
				103	* only mappings in the kernel memory area, which is above PAGE_OFFSET.
				104	*/
				105	void free_hyp_pmds(void)
				106	{
				107	pgd_t *pgd;
				108	pud_t *pud;
				109	pmd_t *pmd;
				110	unsigned long addr;
				111
				112	mutex_lock(&kvm_hyp_pgd_mutex);
				113	for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) {
				114	pgd = hyp_pgd + pgd_index(addr);
				115	pud = pud_offset(pgd, addr);
				116
				117	if (pud_none(*pud))
				118	continue;
				119	BUG_ON(pud_bad(*pud));
				120
				121	pmd = pmd_offset(pud, addr);
				122	free_ptes(pmd, addr);
				123	pmd_free(NULL, pmd);
				124	pud_clear(pud);
				125	}
				126	mutex_unlock(&kvm_hyp_pgd_mutex);
				127	}
				128
				129	static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
				130	unsigned long end)
				131	{
				132	pte_t *pte;
				133	unsigned long addr;
				134	struct page *page;
				135
				136	for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
				137	pte = pte_offset_kernel(pmd, addr);
				138	BUG_ON(!virt_addr_valid(addr));
				139	page = virt_to_page(addr);
				140	kvm_set_pte(pte, mk_pte(page, PAGE_HYP));
				141	}
				142	}
				143
				144	static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start,
				145	unsigned long end,
				146	unsigned long *pfn_base)
				147	{
				148	pte_t *pte;
				149	unsigned long addr;
				150
				151	for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
				152	pte = pte_offset_kernel(pmd, addr);
				153	BUG_ON(pfn_valid(*pfn_base));
				154	kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE));
				155	(*pfn_base)++;
				156	}
				157	}
				158
				159	static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
				160	unsigned long end, unsigned long *pfn_base)
				161	{
				162	pmd_t *pmd;
				163	pte_t *pte;
				164	unsigned long addr, next;
				165
				166	for (addr = start; addr < end; addr = next) {
				167	pmd = pmd_offset(pud, addr);
				168
				169	BUG_ON(pmd_sect(*pmd));
				170
				171	if (pmd_none(*pmd)) {
				172	pte = pte_alloc_one_kernel(NULL, addr);
				173	if (!pte) {
				174	kvm_err("Cannot allocate Hyp pte\n");
				175	return -ENOMEM;
				176	}
				177	pmd_populate_kernel(NULL, pmd, pte);
				178	}
				179
				180	next = pmd_addr_end(addr, end);
				181
				182	/*
				183	* If pfn_base is NULL, we map kernel pages into HYP with the
				184	* virtual address. Otherwise, this is considered an I/O
				185	* mapping and we map the physical region starting at
				186	* *pfn_base to [start, end[.
				187	*/
				188	if (!pfn_base)
				189	create_hyp_pte_mappings(pmd, addr, next);
				190	else
				191	create_hyp_io_pte_mappings(pmd, addr, next, pfn_base);
				192	}
				193
				194	return 0;
				195	}
				196
				197	static int __create_hyp_mappings(void from, void to, unsigned long *pfn_base)
				198	{
				199	unsigned long start = (unsigned long)from;
				200	unsigned long end = (unsigned long)to;
				201	pgd_t *pgd;
				202	pud_t *pud;
				203	pmd_t *pmd;
				204	unsigned long addr, next;
				205	int err = 0;
				206
				207	BUG_ON(start > end);
				208	if (start < PAGE_OFFSET)
				209	return -EINVAL;
				210
				211	mutex_lock(&kvm_hyp_pgd_mutex);
				212	for (addr = start; addr < end; addr = next) {
				213	pgd = hyp_pgd + pgd_index(addr);
				214	pud = pud_offset(pgd, addr);
				215
				216	if (pud_none_or_clear_bad(pud)) {
				217	pmd = pmd_alloc_one(NULL, addr);
				218	if (!pmd) {
				219	kvm_err("Cannot allocate Hyp pmd\n");
				220	err = -ENOMEM;
				221	goto out;
				222	}
				223	pud_populate(NULL, pud, pmd);
				224	}
				225
				226	next = pgd_addr_end(addr, end);
				227	err = create_hyp_pmd_mappings(pud, addr, next, pfn_base);
				228	if (err)
				229	goto out;
				230	}
				231	out:
				232	mutex_unlock(&kvm_hyp_pgd_mutex);
				233	return err;
				234	}
				235
				236	/**
				237	* create_hyp_mappings - map a kernel virtual address range in Hyp mode
				238	* @from: The virtual kernel start address of the range
				239	* @to: The virtual kernel end address of the range (exclusive)
				240	*
				241	* The same virtual address as the kernel virtual address is also used in
				242	* Hyp-mode mapping to the same underlying physical pages.
				243	*
				244	* Note: Wrapping around zero in the "to" address is not supported.
				245	*/
				246	int create_hyp_mappings(void from, void to)
				247	{
				248	return __create_hyp_mappings(from, to, NULL);
				249	}
				250
				251	/**
				252	* create_hyp_io_mappings - map a physical IO range in Hyp mode
				253	* @from: The virtual HYP start address of the range
				254	* @to: The virtual HYP end address of the range (exclusive)
				255	* @addr: The physical start address which gets mapped
				256	*/
				257	int create_hyp_io_mappings(void from, void to, phys_addr_t addr)
				258	{
				259	unsigned long pfn = __phys_to_pfn(addr);
				260	return __create_hyp_mappings(from, to, &pfn);
				261	}
				262
Christoffer Dall	d5d8184	2013-01-20 18:28:07 -0500	[diff] [blame]	263	/**
				264	* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
				265	* @kvm: The KVM struct pointer for the VM.
				266	*
				267	* Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
				268	* support either full 40-bit input addresses or limited to 32-bit input
				269	* addresses). Clears the allocated pages.
				270	*
				271	* Note we don't need locking here as this is only called when the VM is
				272	* created, which can only be done once.
				273	*/
				274	int kvm_alloc_stage2_pgd(struct kvm *kvm)
				275	{
				276	pgd_t *pgd;
				277
				278	if (kvm->arch.pgd != NULL) {
				279	kvm_err("kvm_arch already initialized?\n");
				280	return -EINVAL;
				281	}
				282
				283	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
				284	if (!pgd)
				285	return -ENOMEM;
				286
				287	/* stage-2 pgd must be aligned to its size */
				288	VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
				289
				290	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
				291	clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
				292	kvm->arch.pgd = pgd;
				293
				294	return 0;
				295	}
				296
				297	static void clear_pud_entry(pud_t *pud)
				298	{
				299	pmd_t *pmd_table = pmd_offset(pud, 0);
				300	pud_clear(pud);
				301	pmd_free(NULL, pmd_table);
				302	put_page(virt_to_page(pud));
				303	}
				304
				305	static void clear_pmd_entry(pmd_t *pmd)
				306	{
				307	pte_t *pte_table = pte_offset_kernel(pmd, 0);
				308	pmd_clear(pmd);
				309	pte_free_kernel(NULL, pte_table);
				310	put_page(virt_to_page(pmd));
				311	}
				312
				313	static bool pmd_empty(pmd_t *pmd)
				314	{
				315	struct page *pmd_page = virt_to_page(pmd);
				316	return page_count(pmd_page) == 1;
				317	}
				318
				319	static void clear_pte_entry(pte_t *pte)
				320	{
				321	if (pte_present(*pte)) {
				322	kvm_set_pte(pte, __pte(0));
				323	put_page(virt_to_page(pte));
				324	}
				325	}
				326
				327	static bool pte_empty(pte_t *pte)
				328	{
				329	struct page *pte_page = virt_to_page(pte);
				330	return page_count(pte_page) == 1;
				331	}
				332
				333	/**
				334	* unmap_stage2_range -- Clear stage2 page table entries to unmap a range
				335	* @kvm: The VM pointer
				336	* @start: The intermediate physical base address of the range to unmap
				337	* @size: The size of the area to unmap
				338	*
				339	* Clear a range of stage-2 mappings, lowering the various ref-counts. Must
				340	* be called while holding mmu_lock (unless for freeing the stage2 pgd before
				341	* destroying the VM), otherwise another faulting VCPU may come in and mess
				342	* with things behind our backs.
				343	*/
				344	static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
				345	{
				346	pgd_t *pgd;
				347	pud_t *pud;
				348	pmd_t *pmd;
				349	pte_t *pte;
				350	phys_addr_t addr = start, end = start + size;
				351	u64 range;
				352
				353	while (addr < end) {
				354	pgd = kvm->arch.pgd + pgd_index(addr);
				355	pud = pud_offset(pgd, addr);
				356	if (pud_none(*pud)) {
				357	addr += PUD_SIZE;
				358	continue;
				359	}
				360
				361	pmd = pmd_offset(pud, addr);
				362	if (pmd_none(*pmd)) {
				363	addr += PMD_SIZE;
				364	continue;
				365	}
				366
				367	pte = pte_offset_kernel(pmd, addr);
				368	clear_pte_entry(pte);
				369	range = PAGE_SIZE;
				370
				371	/* If we emptied the pte, walk back up the ladder */
				372	if (pte_empty(pte)) {
				373	clear_pmd_entry(pmd);
				374	range = PMD_SIZE;
				375	if (pmd_empty(pmd)) {
				376	clear_pud_entry(pud);
				377	range = PUD_SIZE;
				378	}
				379	}
				380
				381	addr += range;
				382	}
				383	}
				384
				385	/**
				386	* kvm_free_stage2_pgd - free all stage-2 tables
				387	* @kvm: The KVM struct pointer for the VM.
				388	*
				389	* Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
				390	* underlying level-2 and level-3 tables before freeing the actual level-1 table
				391	* and setting the struct pointer to NULL.
				392	*
				393	* Note we don't need locking here as this is only called when the VM is
				394	* destroyed, which can only be done once.
				395	*/
				396	void kvm_free_stage2_pgd(struct kvm *kvm)
				397	{
				398	if (kvm->arch.pgd == NULL)
				399	return;
				400
				401	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
				402	free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
				403	kvm->arch.pgd = NULL;
				404	}
				405
				406
				407	static int stage2_set_pte(struct kvm kvm, struct kvm_mmu_memory_cache cache,
				408	phys_addr_t addr, const pte_t *new_pte, bool iomap)
				409	{
				410	pgd_t *pgd;
				411	pud_t *pud;
				412	pmd_t *pmd;
				413	pte_t *pte, old_pte;
				414
				415	/* Create 2nd stage page table mapping - Level 1 */
				416	pgd = kvm->arch.pgd + pgd_index(addr);
				417	pud = pud_offset(pgd, addr);
				418	if (pud_none(*pud)) {
				419	if (!cache)
				420	return 0; /* ignore calls from kvm_set_spte_hva */
				421	pmd = mmu_memory_cache_alloc(cache);
				422	pud_populate(NULL, pud, pmd);
				423	pmd += pmd_index(addr);
				424	get_page(virt_to_page(pud));
				425	} else
				426	pmd = pmd_offset(pud, addr);
				427
				428	/* Create 2nd stage page table mapping - Level 2 */
				429	if (pmd_none(*pmd)) {
				430	if (!cache)
				431	return 0; /* ignore calls from kvm_set_spte_hva */
				432	pte = mmu_memory_cache_alloc(cache);
				433	clean_pte_table(pte);
				434	pmd_populate_kernel(NULL, pmd, pte);
				435	pte += pte_index(addr);
				436	get_page(virt_to_page(pmd));
				437	} else
				438	pte = pte_offset_kernel(pmd, addr);
				439
				440	if (iomap && pte_present(*pte))
				441	return -EFAULT;
				442
				443	/* Create 2nd stage page table mapping - Level 3 */
				444	old_pte = *pte;
				445	kvm_set_pte(pte, *new_pte);
				446	if (pte_present(old_pte))
				447	kvm_tlb_flush_vmid(kvm);
				448	else
				449	get_page(virt_to_page(pte));
				450
				451	return 0;
				452	}
				453
				454	/**
				455	* kvm_phys_addr_ioremap - map a device range to guest IPA
				456	*
				457	* @kvm: The KVM pointer
				458	* @guest_ipa: The IPA at which to insert the mapping
				459	* @pa: The physical address of the device
				460	* @size: The size of the mapping
				461	*/
				462	int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
				463	phys_addr_t pa, unsigned long size)
				464	{
				465	phys_addr_t addr, end;
				466	int ret = 0;
				467	unsigned long pfn;
				468	struct kvm_mmu_memory_cache cache = { 0, };
				469
				470	end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
				471	pfn = __phys_to_pfn(pa);
				472
				473	for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
				474	pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE \| L_PTE_S2_RDWR);
				475
				476	ret = mmu_topup_memory_cache(&cache, 2, 2);
				477	if (ret)
				478	goto out;
				479	spin_lock(&kvm->mmu_lock);
				480	ret = stage2_set_pte(kvm, &cache, addr, &pte, true);
				481	spin_unlock(&kvm->mmu_lock);
				482	if (ret)
				483	goto out;
				484
				485	pfn++;
				486	}
				487
				488	out:
				489	mmu_free_memory_cache(&cache);
				490	return ret;
				491	}
				492
Christoffer Dall	94f8e64	2013-01-20 18:28:12 -0500	[diff] [blame^]	493	static void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
				494	{
				495	/*
				496	* If we are going to insert an instruction page and the icache is
				497	* either VIPT or PIPT, there is a potential problem where the host
				498	* (or another VM) may have used the same page as this guest, and we
				499	* read incorrect data from the icache. If we're using a PIPT cache,
				500	* we can invalidate just that page, but if we are using a VIPT cache
				501	* we need to invalidate the entire icache - damn shame - as written
				502	* in the ARM ARM (DDI 0406C.b - Page B3-1393).
				503	*
				504	* VIVT caches are tagged using both the ASID and the VMID and doesn't
				505	* need any kind of flushing (DDI 0406C.b - Page B3-1392).
				506	*/
				507	if (icache_is_pipt()) {
				508	unsigned long hva = gfn_to_hva(kvm, gfn);
				509	__cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
				510	} else if (!icache_is_vivt_asid_tagged()) {
				511	/* any kind of VIPT cache */
				512	__flush_icache_all();
				513	}
				514	}
				515
				516	static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
				517	gfn_t gfn, struct kvm_memory_slot *memslot,
				518	unsigned long fault_status)
				519	{
				520	pte_t new_pte;
				521	pfn_t pfn;
				522	int ret;
				523	bool write_fault, writable;
				524	unsigned long mmu_seq;
				525	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
				526
				527	write_fault = kvm_is_write_fault(vcpu->arch.hsr);
				528	if (fault_status == FSC_PERM && !write_fault) {
				529	kvm_err("Unexpected L2 read permission error\n");
				530	return -EFAULT;
				531	}
				532
				533	/* We need minimum second+third level pages */
				534	ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
				535	if (ret)
				536	return ret;
				537
				538	mmu_seq = vcpu->kvm->mmu_notifier_seq;
				539	/*
				540	* Ensure the read of mmu_notifier_seq happens before we call
				541	* gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
				542	* the page we just got a reference to gets unmapped before we have a
				543	* chance to grab the mmu_lock, which ensure that if the page gets
				544	* unmapped afterwards, the call to kvm_unmap_hva will take it away
				545	* from us again properly. This smp_rmb() interacts with the smp_wmb()
				546	* in kvm_mmu_notifier_invalidate_<page\|range_end>.
				547	*/
				548	smp_rmb();
				549
				550	pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
				551	if (is_error_pfn(pfn))
				552	return -EFAULT;
				553
				554	new_pte = pfn_pte(pfn, PAGE_S2);
				555	coherent_icache_guest_page(vcpu->kvm, gfn);
				556
				557	spin_lock(&vcpu->kvm->mmu_lock);
				558	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
				559	goto out_unlock;
				560	if (writable) {
				561	pte_val(new_pte) \|= L_PTE_S2_RDWR;
				562	kvm_set_pfn_dirty(pfn);
				563	}
				564	stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
				565
				566	out_unlock:
				567	spin_unlock(&vcpu->kvm->mmu_lock);
				568	kvm_release_pfn_clean(pfn);
				569	return 0;
				570	}
				571
				572	/**
				573	* kvm_handle_guest_abort - handles all 2nd stage aborts
				574	* @vcpu: the VCPU pointer
				575	* @run: the kvm_run structure
				576	*
				577	* Any abort that gets to the host is almost guaranteed to be caused by a
				578	* missing second stage translation table entry, which can mean that either the
				579	* guest simply needs more memory and we must allocate an appropriate page or it
				580	* can mean that the guest tried to access I/O memory, which is emulated by user
				581	* space. The distinction is based on the IPA causing the fault and whether this
				582	* memory region has been registered as standard RAM by user space.
				583	*/
Christoffer Dall	342cd0a	2013-01-20 18:28:06 -0500	[diff] [blame]	584	int kvm_handle_guest_abort(struct kvm_vcpu vcpu, struct kvm_run run)
				585	{
Christoffer Dall	94f8e64	2013-01-20 18:28:12 -0500	[diff] [blame^]	586	unsigned long hsr_ec;
				587	unsigned long fault_status;
				588	phys_addr_t fault_ipa;
				589	struct kvm_memory_slot *memslot;
				590	bool is_iabt;
				591	gfn_t gfn;
				592	int ret, idx;
				593
				594	hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
				595	is_iabt = (hsr_ec == HSR_EC_IABT);
				596	fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
				597
				598	trace_kvm_guest_fault(*vcpu_pc(vcpu), vcpu->arch.hsr,
				599	vcpu->arch.hxfar, fault_ipa);
				600
				601	/* Check the stage-2 fault is trans. fault or write fault */
				602	fault_status = (vcpu->arch.hsr & HSR_FSC_TYPE);
				603	if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
				604	kvm_err("Unsupported fault status: EC=%#lx DFCS=%#lx\n",
				605	hsr_ec, fault_status);
				606	return -EFAULT;
				607	}
				608
				609	idx = srcu_read_lock(&vcpu->kvm->srcu);
				610
				611	gfn = fault_ipa >> PAGE_SHIFT;
				612	if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
				613	if (is_iabt) {
				614	/* Prefetch Abort on I/O address */
				615	kvm_inject_pabt(vcpu, vcpu->arch.hxfar);
				616	ret = 1;
				617	goto out_unlock;
				618	}
				619
				620	if (fault_status != FSC_FAULT) {
				621	kvm_err("Unsupported fault status on io memory: %#lx\n",
				622	fault_status);
				623	ret = -EFAULT;
				624	goto out_unlock;
				625	}
				626
				627	kvm_pr_unimpl("I/O address abort...");
				628	ret = 0;
				629	goto out_unlock;
				630	}
				631
				632	memslot = gfn_to_memslot(vcpu->kvm, gfn);
				633	if (!memslot->user_alloc) {
				634	kvm_err("non user-alloc memslots not supported\n");
				635	ret = -EINVAL;
				636	goto out_unlock;
				637	}
				638
				639	ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
				640	if (ret == 0)
				641	ret = 1;
				642	out_unlock:
				643	srcu_read_unlock(&vcpu->kvm->srcu, idx);
				644	return ret;
Christoffer Dall	342cd0a	2013-01-20 18:28:06 -0500	[diff] [blame]	645	}
				646
Christoffer Dall	d5d8184	2013-01-20 18:28:07 -0500	[diff] [blame]	647	static void handle_hva_to_gpa(struct kvm *kvm,
				648	unsigned long start,
				649	unsigned long end,
				650	void (handler)(struct kvm kvm,
				651	gpa_t gpa, void *data),
				652	void *data)
				653	{
				654	struct kvm_memslots *slots;
				655	struct kvm_memory_slot *memslot;
				656
				657	slots = kvm_memslots(kvm);
				658
				659	/* we only care about the pages that the guest sees */
				660	kvm_for_each_memslot(memslot, slots) {
				661	unsigned long hva_start, hva_end;
				662	gfn_t gfn, gfn_end;
				663
				664	hva_start = max(start, memslot->userspace_addr);
				665	hva_end = min(end, memslot->userspace_addr +
				666	(memslot->npages << PAGE_SHIFT));
				667	if (hva_start >= hva_end)
				668	continue;
				669
				670	/*
				671	* {gfn(page) \| page intersects with [hva_start, hva_end)} =
				672	* {gfn_start, gfn_start+1, ..., gfn_end-1}.
				673	*/
				674	gfn = hva_to_gfn_memslot(hva_start, memslot);
				675	gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
				676
				677	for (; gfn < gfn_end; ++gfn) {
				678	gpa_t gpa = gfn << PAGE_SHIFT;
				679	handler(kvm, gpa, data);
				680	}
				681	}
				682	}
				683
				684	static void kvm_unmap_hva_handler(struct kvm kvm, gpa_t gpa, void data)
				685	{
				686	unmap_stage2_range(kvm, gpa, PAGE_SIZE);
				687	kvm_tlb_flush_vmid(kvm);
				688	}
				689
				690	int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
				691	{
				692	unsigned long end = hva + PAGE_SIZE;
				693
				694	if (!kvm->arch.pgd)
				695	return 0;
				696
				697	trace_kvm_unmap_hva(hva);
				698	handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
				699	return 0;
				700	}
				701
				702	int kvm_unmap_hva_range(struct kvm *kvm,
				703	unsigned long start, unsigned long end)
				704	{
				705	if (!kvm->arch.pgd)
				706	return 0;
				707
				708	trace_kvm_unmap_hva_range(start, end);
				709	handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
				710	return 0;
				711	}
				712
				713	static void kvm_set_spte_handler(struct kvm kvm, gpa_t gpa, void data)
				714	{
				715	pte_t pte = (pte_t )data;
				716
				717	stage2_set_pte(kvm, NULL, gpa, pte, false);
				718	}
				719
				720
				721	void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
				722	{
				723	unsigned long end = hva + PAGE_SIZE;
				724	pte_t stage2_pte;
				725
				726	if (!kvm->arch.pgd)
				727	return;
				728
				729	trace_kvm_set_spte_hva(hva);
				730	stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
				731	handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
				732	}
				733
				734	void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
				735	{
				736	mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
				737	}
				738
Christoffer Dall	342cd0a	2013-01-20 18:28:06 -0500	[diff] [blame]	739	phys_addr_t kvm_mmu_get_httbr(void)
				740	{
				741	VM_BUG_ON(!virt_addr_valid(hyp_pgd));
				742	return virt_to_phys(hyp_pgd);
				743	}
				744
				745	int kvm_mmu_init(void)
				746	{
Christoffer Dall	d5d8184	2013-01-20 18:28:07 -0500	[diff] [blame]	747	if (!hyp_pgd) {
				748	kvm_err("Hyp mode PGD not allocated\n");
				749	return -ENOMEM;
				750	}
				751
				752	return 0;
Christoffer Dall	342cd0a	2013-01-20 18:28:06 -0500	[diff] [blame]	753	}
				754
				755	/**
				756	* kvm_clear_idmap - remove all idmaps from the hyp pgd
				757	*
				758	* Free the underlying pmds for all pgds in range and clear the pgds (but
				759	* don't free them) afterwards.
				760	*/
				761	void kvm_clear_hyp_idmap(void)
				762	{
				763	unsigned long addr, end;
				764	unsigned long next;
				765	pgd_t *pgd = hyp_pgd;
				766	pud_t *pud;
				767	pmd_t *pmd;
				768
				769	addr = virt_to_phys(__hyp_idmap_text_start);
				770	end = virt_to_phys(__hyp_idmap_text_end);
				771
				772	pgd += pgd_index(addr);
				773	do {
				774	next = pgd_addr_end(addr, end);
				775	if (pgd_none_or_clear_bad(pgd))
				776	continue;
				777	pud = pud_offset(pgd, addr);
				778	pmd = pmd_offset(pud, addr);
				779
				780	pud_clear(pud);
				781	clean_pmd_entry(pmd);
				782	pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
				783	} while (pgd++, addr = next, addr < end);
				784	}