Blame - arch/tile/mm/pgtable.c - kernel/msm-4.19

blob: 7bf2491a9c1f6ce8b4f2c1fc31b4dc8f46addaab [file] [log] [blame]

Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	1	/*
				2	* Copyright 2010 Tilera Corporation. All Rights Reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public License
				6	* as published by the Free Software Foundation, version 2.
				7	*
				8	* This program is distributed in the hope that it will be useful, but
				9	* WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
				11	* NON INFRINGEMENT. See the GNU General Public License for
				12	* more details.
				13	*/
				14
				15	#include <linux/sched.h>
				16	#include <linux/kernel.h>
				17	#include <linux/errno.h>
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	20	#include <linux/highmem.h>
				21	#include <linux/slab.h>
				22	#include <linux/pagemap.h>
				23	#include <linux/spinlock.h>
				24	#include <linux/cpumask.h>
				25	#include <linux/module.h>
				26	#include <linux/io.h>
				27	#include <linux/vmalloc.h>
				28	#include <linux/smp.h>
				29
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	30	#include <asm/pgtable.h>
				31	#include <asm/pgalloc.h>
				32	#include <asm/fixmap.h>
				33	#include <asm/tlb.h>
				34	#include <asm/tlbflush.h>
				35	#include <asm/homecache.h>
				36
				37	#define K(x) ((x) << (PAGE_SHIFT-10))
				38
				39	/*
				40	* The normal show_free_areas() is too verbose on Tile, with dozens
				41	* of processors and often four NUMA zones each with high and lowmem.
				42	*/
David Rientjes	b2b755b	2011-03-24 15:18:15 -0700	[diff] [blame]	43	void show_mem(unsigned int filter)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	44	{
				45	struct zone *zone;
				46
Joe Perches	f474367	2014-10-31 10:50:46 -0700	[diff] [blame]	47	pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu pagecache:%lu swap:%lu\n",
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	48	(global_page_state(NR_ACTIVE_ANON) +
				49	global_page_state(NR_ACTIVE_FILE)),
				50	(global_page_state(NR_INACTIVE_ANON) +
				51	global_page_state(NR_INACTIVE_FILE)),
				52	global_page_state(NR_FILE_DIRTY),
				53	global_page_state(NR_WRITEBACK),
				54	global_page_state(NR_UNSTABLE_NFS),
				55	global_page_state(NR_FREE_PAGES),
				56	(global_page_state(NR_SLAB_RECLAIMABLE) +
				57	global_page_state(NR_SLAB_UNRECLAIMABLE)),
				58	global_page_state(NR_FILE_MAPPED),
				59	global_page_state(NR_PAGETABLE),
				60	global_page_state(NR_BOUNCE),
				61	global_page_state(NR_FILE_PAGES),
Shaohua Li	ec8acf2	2013-02-22 16:34:38 -0800	[diff] [blame]	62	get_nr_swap_pages());
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	63
				64	for_each_zone(zone) {
				65	unsigned long flags, order, total = 0, largest_order = -1;
				66
				67	if (!populated_zone(zone))
				68	continue;
				69
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	70	spin_lock_irqsave(&zone->lock, flags);
				71	for (order = 0; order < MAX_ORDER; order++) {
				72	int nr = zone->free_area[order].nr_free;
				73	total += nr << order;
				74	if (nr)
				75	largest_order = order;
				76	}
				77	spin_unlock_irqrestore(&zone->lock, flags);
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	78	pr_err("Node %d %7s: %lukB (largest %luKb)\n",
				79	zone_to_nid(zone), zone->name,
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	80	K(total), largest_order ? K(1UL) << largest_order : 0);
				81	}
				82	}
				83
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	84	/**
				85	* shatter_huge_page() - ensure a given address is mapped by a small page.
				86	*
				87	* This function converts a huge PTE mapping kernel LOWMEM into a bunch
				88	* of small PTEs with the same caching. No cache flush required, but we
				89	* must do a global TLB flush.
				90	*
				91	* Any caller that wishes to modify a kernel mapping that might
				92	* have been made with a huge page should call this function,
				93	* since doing so properly avoids race conditions with installing the
				94	* newly-shattered page and then flushing all the TLB entries.
				95	*
				96	* @addr: Address at which to shatter any existing huge page.
				97	*/
				98	void shatter_huge_page(unsigned long addr)
				99	{
				100	pgd_t *pgd;
				101	pud_t *pud;
				102	pmd_t *pmd;
				103	unsigned long flags = 0; /* happy compiler */
				104	#ifdef __PAGETABLE_PMD_FOLDED
				105	struct list_head *pos;
				106	#endif
				107
				108	/* Get a pointer to the pmd entry that we need to change. */
				109	addr &= HPAGE_MASK;
				110	BUG_ON(pgd_addr_invalid(addr));
				111	BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */
				112	pgd = swapper_pg_dir + pgd_index(addr);
				113	pud = pud_offset(pgd, addr);
				114	BUG_ON(!pud_present(*pud));
				115	pmd = pmd_offset(pud, addr);
				116	BUG_ON(!pmd_present(*pmd));
				117	if (!pmd_huge_page(*pmd))
				118	return;
				119
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	120	spin_lock_irqsave(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	121	if (!pmd_huge_page(*pmd)) {
				122	/* Lost the race to convert the huge page. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	123	spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	124	return;
				125	}
				126
				127	/* Shatter the huge page into the preallocated L2 page table. */
Chris Metcalf	8629470	2013-09-13 11:14:25 -0400	[diff] [blame]	128	pmd_populate_kernel(&init_mm, pmd, get_prealloc_pte(pmd_pfn(*pmd)));
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	129
				130	#ifdef __PAGETABLE_PMD_FOLDED
				131	/* Walk every pgd on the system and update the pmd there. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	132	spin_lock(&pgd_lock);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	133	list_for_each(pos, &pgd_list) {
				134	pmd_t *copy_pmd;
				135	pgd = list_to_pgd(pos) + pgd_index(addr);
				136	pud = pud_offset(pgd, addr);
				137	copy_pmd = pmd_offset(pud, addr);
				138	__set_pmd(copy_pmd, *pmd);
				139	}
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	140	spin_unlock(&pgd_lock);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	141	#endif
				142
				143	/* Tell every cpu to notice the change. */
				144	flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE,
				145	cpu_possible_mask, NULL, 0);
				146
				147	/* Hold the lock until the TLB flush is finished to avoid races. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	148	spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	149	}
				150
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	151	/*
				152	* List of all pgd's needed so it can invalidate entries in both cached
				153	* and uncached pgd's. This is essentially codepath-based locking
				154	* against pageattr.c; it is the unique case in which a valid change
				155	* of kernel pagetables can't be lazily synchronized by vmalloc faults.
				156	* vmalloc faults work because attached pagetables are never freed.
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	157	*
				158	* The lock is always taken with interrupts disabled, unlike on x86
				159	* and other platforms, because we need to take the lock in
				160	* shatter_huge_page(), which may be called from an interrupt context.
				161	* We are not at risk from the tlbflush IPI deadlock that was seen on
				162	* x86, since we use the flush_remote() API to have the hypervisor do
				163	* the TLB flushes regardless of irq disabling.
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	164	*/
				165	DEFINE_SPINLOCK(pgd_lock);
				166	LIST_HEAD(pgd_list);
				167
				168	static inline void pgd_list_add(pgd_t *pgd)
				169	{
				170	list_add(pgd_to_list(pgd), &pgd_list);
				171	}
				172
				173	static inline void pgd_list_del(pgd_t *pgd)
				174	{
				175	list_del(pgd_to_list(pgd));
				176	}
				177
				178	#define KERNEL_PGD_INDEX_START pgd_index(PAGE_OFFSET)
				179	#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_INDEX_START)
				180
				181	static void pgd_ctor(pgd_t *pgd)
				182	{
				183	unsigned long flags;
				184
				185	memset(pgd, 0, KERNEL_PGD_INDEX_START*sizeof(pgd_t));
				186	spin_lock_irqsave(&pgd_lock, flags);
				187
				188	#ifndef __tilegx__
				189	/*
				190	* Check that the user interrupt vector has no L2.
				191	* It never should for the swapper, and new page tables
				192	* should always start with an empty user interrupt vector.
				193	*/
				194	BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
				195	#endif
				196
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	197	memcpy(pgd + KERNEL_PGD_INDEX_START,
				198	swapper_pg_dir + KERNEL_PGD_INDEX_START,
				199	KERNEL_PGD_PTRS * sizeof(pgd_t));
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	200
				201	pgd_list_add(pgd);
				202	spin_unlock_irqrestore(&pgd_lock, flags);
				203	}
				204
				205	static void pgd_dtor(pgd_t *pgd)
				206	{
				207	unsigned long flags; /* can be called from interrupt context */
				208
				209	spin_lock_irqsave(&pgd_lock, flags);
				210	pgd_list_del(pgd);
				211	spin_unlock_irqrestore(&pgd_lock, flags);
				212	}
				213
				214	pgd_t pgd_alloc(struct mm_struct mm)
				215	{
				216	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
				217	if (pgd)
				218	pgd_ctor(pgd);
				219	return pgd;
				220	}
				221
				222	void pgd_free(struct mm_struct mm, pgd_t pgd)
				223	{
				224	pgd_dtor(pgd);
				225	kmem_cache_free(pgd_cache, pgd);
				226	}
				227
				228
				229	#define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER)
				230
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	231	struct page pgtable_alloc_one(struct mm_struct mm, unsigned long address,
				232	int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	233	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	234	gfp_t flags = GFP_KERNEL\|__GFP_REPEAT\|__GFP_ZERO;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	235	struct page *p;
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	236	int i;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	237
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	238	p = alloc_pages(flags, L2_USER_PGTABLE_ORDER);
				239	if (p == NULL)
				240	return NULL;
				241
Kirill A. Shutemov	76b3aec	2013-11-14 14:31:43 -0800	[diff] [blame]	242	if (!pgtable_page_ctor(p)) {
				243	__free_pages(p, L2_USER_PGTABLE_ORDER);
				244	return NULL;
				245	}
				246
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	247	/*
				248	* Make every page have a page_count() of one, not just the first.
				249	* We don't use __GFP_COMP since it doesn't look like it works
				250	* correctly with tlb_remove_page().
				251	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	252	for (i = 1; i < order; ++i) {
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	253	init_page_count(p+i);
				254	inc_zone_page_state(p+i, NR_PAGETABLE);
				255	}
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	256
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	257	return p;
				258	}
				259
				260	/*
				261	* Free page immediately (used in __pte_alloc if we raced with another
				262	* process). We have to correct whatever pte_alloc_one() did before
				263	* returning the pages to the allocator.
				264	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	265	void pgtable_free(struct mm_struct mm, struct page p, int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	266	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	267	int i;
				268
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	269	pgtable_page_dtor(p);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	270	__free_page(p);
				271
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	272	for (i = 1; i < order; ++i) {
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	273	__free_page(p+i);
				274	dec_zone_page_state(p+i, NR_PAGETABLE);
				275	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	276	}
				277
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	278	void __pgtable_free_tlb(struct mmu_gather tlb, struct page pte,
				279	unsigned long address, int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	280	{
				281	int i;
				282
				283	pgtable_page_dtor(pte);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	284	tlb_remove_page(tlb, pte);
				285
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	286	for (i = 1; i < order; ++i) {
Peter Zijlstra	342d87e	2011-01-25 18:31:12 +0100	[diff] [blame]	287	tlb_remove_page(tlb, pte + i);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	288	dec_zone_page_state(pte + i, NR_PAGETABLE);
				289	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	290	}
				291
				292	#ifndef __tilegx__
				293
				294	/*
				295	* FIXME: needs to be atomic vs hypervisor writes. For now we make the
				296	* window of vulnerability a bit smaller by doing an unlocked 8-bit update.
				297	*/
				298	int ptep_test_and_clear_young(struct vm_area_struct *vma,
				299	unsigned long addr, pte_t *ptep)
				300	{
				301	#if HV_PTE_INDEX_ACCESSED < 8 \|\| HV_PTE_INDEX_ACCESSED >= 16
				302	# error Code assumes HV_PTE "accessed" bit in second byte
				303	#endif
				304	u8 tmp = (u8 )ptep;
				305	u8 second_byte = tmp[1];
				306	if (!(second_byte & (1 << (HV_PTE_INDEX_ACCESSED - 8))))
				307	return 0;
				308	tmp[1] = second_byte & ~(1 << (HV_PTE_INDEX_ACCESSED - 8));
				309	return 1;
				310	}
				311
				312	/*
				313	* This implementation is atomic vs hypervisor writes, since the hypervisor
				314	* always writes the low word (where "accessed" and "dirty" are) and this
				315	* routine only writes the high word.
				316	*/
				317	void ptep_set_wrprotect(struct mm_struct *mm,
				318	unsigned long addr, pte_t *ptep)
				319	{
				320	#if HV_PTE_INDEX_WRITABLE < 32
				321	# error Code assumes HV_PTE "writable" bit in high word
				322	#endif
				323	u32 tmp = (u32 )ptep;
				324	tmp[1] = tmp[1] & ~(1 << (HV_PTE_INDEX_WRITABLE - 32));
				325	}
				326
				327	#endif
				328
Chris Metcalf	640710a	2013-08-12 15:08:09 -0400	[diff] [blame]	329	/*
				330	* Return a pointer to the PTE that corresponds to the given
				331	* address in the given page table. A NULL page table just uses
				332	* the standard kernel page table; the preferred API in this case
				333	* is virt_to_kpte().
				334	*
				335	* The returned pointer can point to a huge page in other levels
				336	* of the page table than the bottom, if the huge page is present
				337	* in the page table. For bottom-level PTEs, the returned pointer
				338	* can point to a PTE that is either present or not.
				339	*/
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	340	pte_t virt_to_pte(struct mm_struct mm, unsigned long addr)
				341	{
				342	pgd_t *pgd;
				343	pud_t *pud;
				344	pmd_t *pmd;
				345
				346	if (pgd_addr_invalid(addr))
				347	return NULL;
				348
				349	pgd = mm ? pgd_offset(mm, addr) : swapper_pg_dir + pgd_index(addr);
				350	pud = pud_offset(pgd, addr);
				351	if (!pud_present(*pud))
				352	return NULL;
Chris Metcalf	a718e10	2013-08-10 13:15:46 -0400	[diff] [blame]	353	if (pud_huge_page(*pud))
				354	return (pte_t *)pud;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	355	pmd = pmd_offset(pud, addr);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	356	if (!pmd_present(*pmd))
				357	return NULL;
Chris Metcalf	640710a	2013-08-12 15:08:09 -0400	[diff] [blame]	358	if (pmd_huge_page(*pmd))
				359	return (pte_t *)pmd;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	360	return pte_offset_kernel(pmd, addr);
				361	}
Chris Metcalf	a718e10	2013-08-10 13:15:46 -0400	[diff] [blame]	362	EXPORT_SYMBOL(virt_to_pte);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	363
Chris Metcalf	640710a	2013-08-12 15:08:09 -0400	[diff] [blame]	364	pte_t *virt_to_kpte(unsigned long kaddr)
				365	{
				366	BUG_ON(kaddr < PAGE_OFFSET);
				367	return virt_to_pte(NULL, kaddr);
				368	}
				369	EXPORT_SYMBOL(virt_to_kpte);
				370
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	371	pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu)
				372	{
				373	unsigned int width = smp_width;
				374	int x = cpu % width;
				375	int y = cpu / width;
				376	BUG_ON(y >= smp_height);
				377	BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
				378	BUG_ON(cpu < 0 \|\| cpu >= NR_CPUS);
				379	BUG_ON(!cpu_is_valid_lotar(cpu));
				380	return hv_pte_set_lotar(prot, HV_XY_TO_LOTAR(x, y));
				381	}
				382
				383	int get_remote_cache_cpu(pgprot_t prot)
				384	{
				385	HV_LOTAR lotar = hv_pte_get_lotar(prot);
				386	int x = HV_LOTAR_X(lotar);
				387	int y = HV_LOTAR_Y(lotar);
				388	BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
				389	return x + y * smp_width;
				390	}
				391
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	392	/*
				393	* Convert a kernel VA to a PA and homing information.
				394	*/
				395	int va_to_cpa_and_pte(void va, unsigned long long cpa, pte_t *pte)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	396	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	397	struct page *page = virt_to_page(va);
				398	pte_t null_pte = { 0 };
				399
				400	*cpa = __pa(va);
				401
				402	/* Note that this is not writing a page table, just returning a pte. */
				403	*pte = pte_set_home(null_pte, page_home(page));
				404
				405	return 0; /* return non-zero if not hfh? */
				406	}
				407	EXPORT_SYMBOL(va_to_cpa_and_pte);
				408
				409	void __set_pte(pte_t *ptep, pte_t pte)
				410	{
				411	#ifdef __tilegx__
				412	*ptep = pte;
				413	#else
				414	# if HV_PTE_INDEX_PRESENT >= 32 \|\| HV_PTE_INDEX_MIGRATING >= 32
				415	# error Must write the present and migrating bits last
				416	# endif
				417	if (pte_present(pte)) {
				418	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
				419	barrier();
				420	((u32 *)ptep)[0] = (u32)(pte_val(pte));
				421	} else {
				422	((u32 *)ptep)[0] = (u32)(pte_val(pte));
				423	barrier();
				424	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
				425	}
				426	#endif /* __tilegx__ */
				427	}
				428
				429	void set_pte(pte_t *ptep, pte_t pte)
				430	{
Chris Metcalf	12400f1	2012-03-29 15:36:53 -0400	[diff] [blame]	431	if (pte_present(pte) &&
				432	(!CHIP_HAS_MMIO() \|\| hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) {
				433	/* The PTE actually references physical memory. */
				434	unsigned long pfn = pte_pfn(pte);
				435	if (pfn_valid(pfn)) {
				436	/* Update the home of the PTE from the struct page. */
				437	pte = pte_set_home(pte, page_home(pfn_to_page(pfn)));
				438	} else if (hv_pte_get_mode(pte) == 0) {
				439	/* remap_pfn_range(), etc, must supply PTE mode. */
				440	panic("set_pte(): out-of-range PFN and mode 0\n");
				441	}
				442	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	443
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	444	__set_pte(ptep, pte);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	445	}
				446
				447	/* Can this mm load a PTE with cached_priority set? */
				448	static inline int mm_is_priority_cached(struct mm_struct *mm)
				449	{
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	450	return mm->context.priority_cached != 0;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	451	}
				452
				453	/*
				454	* Add a priority mapping to an mm_context and
				455	* notify the hypervisor if this is the first one.
				456	*/
				457	void start_mm_caching(struct mm_struct *mm)
				458	{
				459	if (!mm_is_priority_cached(mm)) {
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	460	mm->context.priority_cached = -1UL;
				461	hv_set_caching(-1UL);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	462	}
				463	}
				464
				465	/*
				466	* Validate and return the priority_cached flag. We know if it's zero
				467	* that we don't need to scan, since we immediately set it non-zero
				468	* when we first consider a MAP_CACHE_PRIORITY mapping.
				469	*
				470	* We only _try_ to acquire the mmap_sem semaphore; if we can't acquire it,
				471	* since we're in an interrupt context (servicing switch_mm) we don't
				472	* worry about it and don't unset the "priority_cached" field.
				473	* Presumably we'll come back later and have more luck and clear
				474	* the value then; for now we'll just keep the cache marked for priority.
				475	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	476	static unsigned long update_priority_cached(struct mm_struct *mm)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	477	{
				478	if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) {
				479	struct vm_area_struct *vm;
				480	for (vm = mm->mmap; vm; vm = vm->vm_next) {
				481	if (hv_pte_get_cached_priority(vm->vm_page_prot))
				482	break;
				483	}
				484	if (vm == NULL)
				485	mm->context.priority_cached = 0;
				486	up_write(&mm->mmap_sem);
				487	}
				488	return mm->context.priority_cached;
				489	}
				490
				491	/* Set caching correctly for an mm that we are switching to. */
				492	void check_mm_caching(struct mm_struct prev, struct mm_struct next)
				493	{
				494	if (!mm_is_priority_cached(next)) {
				495	/*
				496	* If the new mm doesn't use priority caching, just see if we
				497	* need the hv_set_caching(), or can assume it's already zero.
				498	*/
				499	if (mm_is_priority_cached(prev))
				500	hv_set_caching(0);
				501	} else {
				502	hv_set_caching(update_priority_cached(next));
				503	}
				504	}
				505
				506	#if CHIP_HAS_MMIO()
				507
				508	/* Map an arbitrary MMIO address, homed according to pgprot, into VA space. */
				509	void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
				510	pgprot_t home)
				511	{
				512	void *addr;
				513	struct vm_struct *area;
				514	unsigned long offset, last_addr;
				515	pgprot_t pgprot;
				516
				517	/* Don't allow wraparound or zero size */
				518	last_addr = phys_addr + size - 1;
				519	if (!size \|\| last_addr < phys_addr)
				520	return NULL;
				521
				522	/* Create a read/write, MMIO VA mapping homed at the requested shim. */
				523	pgprot = PAGE_KERNEL;
				524	pgprot = hv_pte_set_mode(pgprot, HV_PTE_MODE_MMIO);
				525	pgprot = hv_pte_set_lotar(pgprot, hv_pte_get_lotar(home));
				526
				527	/*
				528	* Mappings have to be page-aligned
				529	*/
				530	offset = phys_addr & ~PAGE_MASK;
				531	phys_addr &= PAGE_MASK;
				532	size = PAGE_ALIGN(last_addr+1) - phys_addr;
				533
				534	/*
				535	* Ok, go for it..
				536	*/
				537	area = get_vm_area(size, VM_IOREMAP /* \| other flags? */);
				538	if (!area)
				539	return NULL;
				540	area->phys_addr = phys_addr;
				541	addr = area->addr;
				542	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
				543	phys_addr, pgprot)) {
Chris Metcalf	fad052d	2013-08-07 15:42:34 -0400	[diff] [blame]	544	free_vm_area(area);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	545	return NULL;
				546	}
				547	return (__force void __iomem ) (offset + (char )addr);
				548	}
				549	EXPORT_SYMBOL(ioremap_prot);
				550
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	551	/* Unmap an MMIO VA mapping. */
				552	void iounmap(volatile void __iomem *addr_in)
				553	{
				554	volatile void __iomem addr = (volatile void __iomem )
				555	(PAGE_MASK & (unsigned long __force)addr_in);
				556	#if 1
				557	vunmap((void * __force)addr);
				558	#else
				559	/* x86 uses this complicated flow instead of vunmap(). Is
				560	* there any particular reason we should do the same? */
				561	struct vm_struct p, o;
				562
				563	/* Use the vm area unlocked, assuming the caller
				564	ensures there isn't another iounmap for the same address
				565	in parallel. Reuse of the virtual address is prevented by
				566	leaving it in the global lists until we're done with it.
				567	cpa takes care of the direct mappings. */
Joonsoo Kim	ef93247	2013-04-29 15:07:27 -0700	[diff] [blame]	568	p = find_vm_area((void *)addr);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	569
				570	if (!p) {
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	571	pr_err("iounmap: bad address %p\n", addr);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	572	dump_stack();
				573	return;
				574	}
				575
				576	/* Finally remove it */
				577	o = remove_vm_area((void *)addr);
				578	BUG_ON(p != o \|\| o == NULL);
				579	kfree(p);
				580	#endif
				581	}
				582	EXPORT_SYMBOL(iounmap);
				583
				584	#endif /* CHIP_HAS_MMIO() */