Blame - arch/tile/mm/pgtable.c - kernel/msm-4.9

blob: 5e86eac4bfae572da1c65ce268b8572ce91a9c42 [file] [log] [blame]

Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	1	/*
				2	* Copyright 2010 Tilera Corporation. All Rights Reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public License
				6	* as published by the Free Software Foundation, version 2.
				7	*
				8	* This program is distributed in the hope that it will be useful, but
				9	* WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
				11	* NON INFRINGEMENT. See the GNU General Public License for
				12	* more details.
				13	*/
				14
				15	#include <linux/sched.h>
				16	#include <linux/kernel.h>
				17	#include <linux/errno.h>
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	20	#include <linux/highmem.h>
				21	#include <linux/slab.h>
				22	#include <linux/pagemap.h>
				23	#include <linux/spinlock.h>
				24	#include <linux/cpumask.h>
				25	#include <linux/module.h>
				26	#include <linux/io.h>
				27	#include <linux/vmalloc.h>
				28	#include <linux/smp.h>
				29
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	30	#include <asm/pgtable.h>
				31	#include <asm/pgalloc.h>
				32	#include <asm/fixmap.h>
				33	#include <asm/tlb.h>
				34	#include <asm/tlbflush.h>
				35	#include <asm/homecache.h>
				36
				37	#define K(x) ((x) << (PAGE_SHIFT-10))
				38
				39	/*
				40	* The normal show_free_areas() is too verbose on Tile, with dozens
				41	* of processors and often four NUMA zones each with high and lowmem.
				42	*/
David Rientjes	b2b755b	2011-03-24 15:18:15 -0700	[diff] [blame]	43	void show_mem(unsigned int filter)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	44	{
				45	struct zone *zone;
				46
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	47	pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu"
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	48	" free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu"
				49	" pagecache:%lu swap:%lu\n",
				50	(global_page_state(NR_ACTIVE_ANON) +
				51	global_page_state(NR_ACTIVE_FILE)),
				52	(global_page_state(NR_INACTIVE_ANON) +
				53	global_page_state(NR_INACTIVE_FILE)),
				54	global_page_state(NR_FILE_DIRTY),
				55	global_page_state(NR_WRITEBACK),
				56	global_page_state(NR_UNSTABLE_NFS),
				57	global_page_state(NR_FREE_PAGES),
				58	(global_page_state(NR_SLAB_RECLAIMABLE) +
				59	global_page_state(NR_SLAB_UNRECLAIMABLE)),
				60	global_page_state(NR_FILE_MAPPED),
				61	global_page_state(NR_PAGETABLE),
				62	global_page_state(NR_BOUNCE),
				63	global_page_state(NR_FILE_PAGES),
Shaohua Li	ec8acf2	2013-02-22 16:34:38 -0800	[diff] [blame]	64	get_nr_swap_pages());
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	65
				66	for_each_zone(zone) {
				67	unsigned long flags, order, total = 0, largest_order = -1;
				68
				69	if (!populated_zone(zone))
				70	continue;
				71
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	72	spin_lock_irqsave(&zone->lock, flags);
				73	for (order = 0; order < MAX_ORDER; order++) {
				74	int nr = zone->free_area[order].nr_free;
				75	total += nr << order;
				76	if (nr)
				77	largest_order = order;
				78	}
				79	spin_unlock_irqrestore(&zone->lock, flags);
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	80	pr_err("Node %d %7s: %lukB (largest %luKb)\n",
				81	zone_to_nid(zone), zone->name,
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	82	K(total), largest_order ? K(1UL) << largest_order : 0);
				83	}
				84	}
				85
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	86	/**
				87	* shatter_huge_page() - ensure a given address is mapped by a small page.
				88	*
				89	* This function converts a huge PTE mapping kernel LOWMEM into a bunch
				90	* of small PTEs with the same caching. No cache flush required, but we
				91	* must do a global TLB flush.
				92	*
				93	* Any caller that wishes to modify a kernel mapping that might
				94	* have been made with a huge page should call this function,
				95	* since doing so properly avoids race conditions with installing the
				96	* newly-shattered page and then flushing all the TLB entries.
				97	*
				98	* @addr: Address at which to shatter any existing huge page.
				99	*/
				100	void shatter_huge_page(unsigned long addr)
				101	{
				102	pgd_t *pgd;
				103	pud_t *pud;
				104	pmd_t *pmd;
				105	unsigned long flags = 0; /* happy compiler */
				106	#ifdef __PAGETABLE_PMD_FOLDED
				107	struct list_head *pos;
				108	#endif
				109
				110	/* Get a pointer to the pmd entry that we need to change. */
				111	addr &= HPAGE_MASK;
				112	BUG_ON(pgd_addr_invalid(addr));
				113	BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */
				114	pgd = swapper_pg_dir + pgd_index(addr);
				115	pud = pud_offset(pgd, addr);
				116	BUG_ON(!pud_present(*pud));
				117	pmd = pmd_offset(pud, addr);
				118	BUG_ON(!pmd_present(*pmd));
				119	if (!pmd_huge_page(*pmd))
				120	return;
				121
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	122	spin_lock_irqsave(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	123	if (!pmd_huge_page(*pmd)) {
				124	/* Lost the race to convert the huge page. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	125	spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	126	return;
				127	}
				128
				129	/* Shatter the huge page into the preallocated L2 page table. */
Chris Metcalf	8629470	2013-09-13 11:14:25 -0400	[diff] [blame]	130	pmd_populate_kernel(&init_mm, pmd, get_prealloc_pte(pmd_pfn(*pmd)));
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	131
				132	#ifdef __PAGETABLE_PMD_FOLDED
				133	/* Walk every pgd on the system and update the pmd there. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	134	spin_lock(&pgd_lock);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	135	list_for_each(pos, &pgd_list) {
				136	pmd_t *copy_pmd;
				137	pgd = list_to_pgd(pos) + pgd_index(addr);
				138	pud = pud_offset(pgd, addr);
				139	copy_pmd = pmd_offset(pud, addr);
				140	__set_pmd(copy_pmd, *pmd);
				141	}
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	142	spin_unlock(&pgd_lock);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	143	#endif
				144
				145	/* Tell every cpu to notice the change. */
				146	flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE,
				147	cpu_possible_mask, NULL, 0);
				148
				149	/* Hold the lock until the TLB flush is finished to avoid races. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	150	spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	151	}
				152
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	153	/*
				154	* List of all pgd's needed so it can invalidate entries in both cached
				155	* and uncached pgd's. This is essentially codepath-based locking
				156	* against pageattr.c; it is the unique case in which a valid change
				157	* of kernel pagetables can't be lazily synchronized by vmalloc faults.
				158	* vmalloc faults work because attached pagetables are never freed.
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	159	*
				160	* The lock is always taken with interrupts disabled, unlike on x86
				161	* and other platforms, because we need to take the lock in
				162	* shatter_huge_page(), which may be called from an interrupt context.
				163	* We are not at risk from the tlbflush IPI deadlock that was seen on
				164	* x86, since we use the flush_remote() API to have the hypervisor do
				165	* the TLB flushes regardless of irq disabling.
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	166	*/
				167	DEFINE_SPINLOCK(pgd_lock);
				168	LIST_HEAD(pgd_list);
				169
				170	static inline void pgd_list_add(pgd_t *pgd)
				171	{
				172	list_add(pgd_to_list(pgd), &pgd_list);
				173	}
				174
				175	static inline void pgd_list_del(pgd_t *pgd)
				176	{
				177	list_del(pgd_to_list(pgd));
				178	}
				179
				180	#define KERNEL_PGD_INDEX_START pgd_index(PAGE_OFFSET)
				181	#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_INDEX_START)
				182
				183	static void pgd_ctor(pgd_t *pgd)
				184	{
				185	unsigned long flags;
				186
				187	memset(pgd, 0, KERNEL_PGD_INDEX_START*sizeof(pgd_t));
				188	spin_lock_irqsave(&pgd_lock, flags);
				189
				190	#ifndef __tilegx__
				191	/*
				192	* Check that the user interrupt vector has no L2.
				193	* It never should for the swapper, and new page tables
				194	* should always start with an empty user interrupt vector.
				195	*/
				196	BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
				197	#endif
				198
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	199	memcpy(pgd + KERNEL_PGD_INDEX_START,
				200	swapper_pg_dir + KERNEL_PGD_INDEX_START,
				201	KERNEL_PGD_PTRS * sizeof(pgd_t));
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	202
				203	pgd_list_add(pgd);
				204	spin_unlock_irqrestore(&pgd_lock, flags);
				205	}
				206
				207	static void pgd_dtor(pgd_t *pgd)
				208	{
				209	unsigned long flags; /* can be called from interrupt context */
				210
				211	spin_lock_irqsave(&pgd_lock, flags);
				212	pgd_list_del(pgd);
				213	spin_unlock_irqrestore(&pgd_lock, flags);
				214	}
				215
				216	pgd_t pgd_alloc(struct mm_struct mm)
				217	{
				218	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
				219	if (pgd)
				220	pgd_ctor(pgd);
				221	return pgd;
				222	}
				223
				224	void pgd_free(struct mm_struct mm, pgd_t pgd)
				225	{
				226	pgd_dtor(pgd);
				227	kmem_cache_free(pgd_cache, pgd);
				228	}
				229
				230
				231	#define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER)
				232
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	233	struct page pgtable_alloc_one(struct mm_struct mm, unsigned long address,
				234	int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	235	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	236	gfp_t flags = GFP_KERNEL\|__GFP_REPEAT\|__GFP_ZERO;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	237	struct page *p;
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	238	int i;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	239
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	240	p = alloc_pages(flags, L2_USER_PGTABLE_ORDER);
				241	if (p == NULL)
				242	return NULL;
				243
Kirill A. Shutemov	76b3aec	2013-11-14 14:31:43 -0800	[diff] [blame]	244	if (!pgtable_page_ctor(p)) {
				245	__free_pages(p, L2_USER_PGTABLE_ORDER);
				246	return NULL;
				247	}
				248
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	249	/*
				250	* Make every page have a page_count() of one, not just the first.
				251	* We don't use __GFP_COMP since it doesn't look like it works
				252	* correctly with tlb_remove_page().
				253	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	254	for (i = 1; i < order; ++i) {
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	255	init_page_count(p+i);
				256	inc_zone_page_state(p+i, NR_PAGETABLE);
				257	}
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	258
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	259	return p;
				260	}
				261
				262	/*
				263	* Free page immediately (used in __pte_alloc if we raced with another
				264	* process). We have to correct whatever pte_alloc_one() did before
				265	* returning the pages to the allocator.
				266	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	267	void pgtable_free(struct mm_struct mm, struct page p, int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	268	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	269	int i;
				270
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	271	pgtable_page_dtor(p);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	272	__free_page(p);
				273
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	274	for (i = 1; i < order; ++i) {
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	275	__free_page(p+i);
				276	dec_zone_page_state(p+i, NR_PAGETABLE);
				277	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	278	}
				279
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	280	void __pgtable_free_tlb(struct mmu_gather tlb, struct page pte,
				281	unsigned long address, int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	282	{
				283	int i;
				284
				285	pgtable_page_dtor(pte);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	286	tlb_remove_page(tlb, pte);
				287
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	288	for (i = 1; i < order; ++i) {
Peter Zijlstra	342d87e	2011-01-25 18:31:12 +0100	[diff] [blame]	289	tlb_remove_page(tlb, pte + i);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	290	dec_zone_page_state(pte + i, NR_PAGETABLE);
				291	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	292	}
				293
				294	#ifndef __tilegx__
				295
				296	/*
				297	* FIXME: needs to be atomic vs hypervisor writes. For now we make the
				298	* window of vulnerability a bit smaller by doing an unlocked 8-bit update.
				299	*/
				300	int ptep_test_and_clear_young(struct vm_area_struct *vma,
				301	unsigned long addr, pte_t *ptep)
				302	{
				303	#if HV_PTE_INDEX_ACCESSED < 8 \|\| HV_PTE_INDEX_ACCESSED >= 16
				304	# error Code assumes HV_PTE "accessed" bit in second byte
				305	#endif
				306	u8 tmp = (u8 )ptep;
				307	u8 second_byte = tmp[1];
				308	if (!(second_byte & (1 << (HV_PTE_INDEX_ACCESSED - 8))))
				309	return 0;
				310	tmp[1] = second_byte & ~(1 << (HV_PTE_INDEX_ACCESSED - 8));
				311	return 1;
				312	}
				313
				314	/*
				315	* This implementation is atomic vs hypervisor writes, since the hypervisor
				316	* always writes the low word (where "accessed" and "dirty" are) and this
				317	* routine only writes the high word.
				318	*/
				319	void ptep_set_wrprotect(struct mm_struct *mm,
				320	unsigned long addr, pte_t *ptep)
				321	{
				322	#if HV_PTE_INDEX_WRITABLE < 32
				323	# error Code assumes HV_PTE "writable" bit in high word
				324	#endif
				325	u32 tmp = (u32 )ptep;
				326	tmp[1] = tmp[1] & ~(1 << (HV_PTE_INDEX_WRITABLE - 32));
				327	}
				328
				329	#endif
				330
Chris Metcalf	640710a	2013-08-12 15:08:09 -0400	[diff] [blame]	331	/*
				332	* Return a pointer to the PTE that corresponds to the given
				333	* address in the given page table. A NULL page table just uses
				334	* the standard kernel page table; the preferred API in this case
				335	* is virt_to_kpte().
				336	*
				337	* The returned pointer can point to a huge page in other levels
				338	* of the page table than the bottom, if the huge page is present
				339	* in the page table. For bottom-level PTEs, the returned pointer
				340	* can point to a PTE that is either present or not.
				341	*/
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	342	pte_t virt_to_pte(struct mm_struct mm, unsigned long addr)
				343	{
				344	pgd_t *pgd;
				345	pud_t *pud;
				346	pmd_t *pmd;
				347
				348	if (pgd_addr_invalid(addr))
				349	return NULL;
				350
				351	pgd = mm ? pgd_offset(mm, addr) : swapper_pg_dir + pgd_index(addr);
				352	pud = pud_offset(pgd, addr);
				353	if (!pud_present(*pud))
				354	return NULL;
Chris Metcalf	a718e10	2013-08-10 13:15:46 -0400	[diff] [blame]	355	if (pud_huge_page(*pud))
				356	return (pte_t *)pud;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	357	pmd = pmd_offset(pud, addr);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	358	if (!pmd_present(*pmd))
				359	return NULL;
Chris Metcalf	640710a	2013-08-12 15:08:09 -0400	[diff] [blame]	360	if (pmd_huge_page(*pmd))
				361	return (pte_t *)pmd;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	362	return pte_offset_kernel(pmd, addr);
				363	}
Chris Metcalf	a718e10	2013-08-10 13:15:46 -0400	[diff] [blame]	364	EXPORT_SYMBOL(virt_to_pte);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	365
Chris Metcalf	640710a	2013-08-12 15:08:09 -0400	[diff] [blame]	366	pte_t *virt_to_kpte(unsigned long kaddr)
				367	{
				368	BUG_ON(kaddr < PAGE_OFFSET);
				369	return virt_to_pte(NULL, kaddr);
				370	}
				371	EXPORT_SYMBOL(virt_to_kpte);
				372
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	373	pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu)
				374	{
				375	unsigned int width = smp_width;
				376	int x = cpu % width;
				377	int y = cpu / width;
				378	BUG_ON(y >= smp_height);
				379	BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
				380	BUG_ON(cpu < 0 \|\| cpu >= NR_CPUS);
				381	BUG_ON(!cpu_is_valid_lotar(cpu));
				382	return hv_pte_set_lotar(prot, HV_XY_TO_LOTAR(x, y));
				383	}
				384
				385	int get_remote_cache_cpu(pgprot_t prot)
				386	{
				387	HV_LOTAR lotar = hv_pte_get_lotar(prot);
				388	int x = HV_LOTAR_X(lotar);
				389	int y = HV_LOTAR_Y(lotar);
				390	BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
				391	return x + y * smp_width;
				392	}
				393
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	394	/*
				395	* Convert a kernel VA to a PA and homing information.
				396	*/
				397	int va_to_cpa_and_pte(void va, unsigned long long cpa, pte_t *pte)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	398	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	399	struct page *page = virt_to_page(va);
				400	pte_t null_pte = { 0 };
				401
				402	*cpa = __pa(va);
				403
				404	/* Note that this is not writing a page table, just returning a pte. */
				405	*pte = pte_set_home(null_pte, page_home(page));
				406
				407	return 0; /* return non-zero if not hfh? */
				408	}
				409	EXPORT_SYMBOL(va_to_cpa_and_pte);
				410
				411	void __set_pte(pte_t *ptep, pte_t pte)
				412	{
				413	#ifdef __tilegx__
				414	*ptep = pte;
				415	#else
				416	# if HV_PTE_INDEX_PRESENT >= 32 \|\| HV_PTE_INDEX_MIGRATING >= 32
				417	# error Must write the present and migrating bits last
				418	# endif
				419	if (pte_present(pte)) {
				420	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
				421	barrier();
				422	((u32 *)ptep)[0] = (u32)(pte_val(pte));
				423	} else {
				424	((u32 *)ptep)[0] = (u32)(pte_val(pte));
				425	barrier();
				426	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
				427	}
				428	#endif /* __tilegx__ */
				429	}
				430
				431	void set_pte(pte_t *ptep, pte_t pte)
				432	{
Chris Metcalf	12400f1	2012-03-29 15:36:53 -0400	[diff] [blame]	433	if (pte_present(pte) &&
				434	(!CHIP_HAS_MMIO() \|\| hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) {
				435	/* The PTE actually references physical memory. */
				436	unsigned long pfn = pte_pfn(pte);
				437	if (pfn_valid(pfn)) {
				438	/* Update the home of the PTE from the struct page. */
				439	pte = pte_set_home(pte, page_home(pfn_to_page(pfn)));
				440	} else if (hv_pte_get_mode(pte) == 0) {
				441	/* remap_pfn_range(), etc, must supply PTE mode. */
				442	panic("set_pte(): out-of-range PFN and mode 0\n");
				443	}
				444	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	445
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	446	__set_pte(ptep, pte);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	447	}
				448
				449	/* Can this mm load a PTE with cached_priority set? */
				450	static inline int mm_is_priority_cached(struct mm_struct *mm)
				451	{
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	452	return mm->context.priority_cached != 0;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	453	}
				454
				455	/*
				456	* Add a priority mapping to an mm_context and
				457	* notify the hypervisor if this is the first one.
				458	*/
				459	void start_mm_caching(struct mm_struct *mm)
				460	{
				461	if (!mm_is_priority_cached(mm)) {
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	462	mm->context.priority_cached = -1UL;
				463	hv_set_caching(-1UL);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	464	}
				465	}
				466
				467	/*
				468	* Validate and return the priority_cached flag. We know if it's zero
				469	* that we don't need to scan, since we immediately set it non-zero
				470	* when we first consider a MAP_CACHE_PRIORITY mapping.
				471	*
				472	* We only _try_ to acquire the mmap_sem semaphore; if we can't acquire it,
				473	* since we're in an interrupt context (servicing switch_mm) we don't
				474	* worry about it and don't unset the "priority_cached" field.
				475	* Presumably we'll come back later and have more luck and clear
				476	* the value then; for now we'll just keep the cache marked for priority.
				477	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	478	static unsigned long update_priority_cached(struct mm_struct *mm)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	479	{
				480	if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) {
				481	struct vm_area_struct *vm;
				482	for (vm = mm->mmap; vm; vm = vm->vm_next) {
				483	if (hv_pte_get_cached_priority(vm->vm_page_prot))
				484	break;
				485	}
				486	if (vm == NULL)
				487	mm->context.priority_cached = 0;
				488	up_write(&mm->mmap_sem);
				489	}
				490	return mm->context.priority_cached;
				491	}
				492
				493	/* Set caching correctly for an mm that we are switching to. */
				494	void check_mm_caching(struct mm_struct prev, struct mm_struct next)
				495	{
				496	if (!mm_is_priority_cached(next)) {
				497	/*
				498	* If the new mm doesn't use priority caching, just see if we
				499	* need the hv_set_caching(), or can assume it's already zero.
				500	*/
				501	if (mm_is_priority_cached(prev))
				502	hv_set_caching(0);
				503	} else {
				504	hv_set_caching(update_priority_cached(next));
				505	}
				506	}
				507
				508	#if CHIP_HAS_MMIO()
				509
				510	/* Map an arbitrary MMIO address, homed according to pgprot, into VA space. */
				511	void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
				512	pgprot_t home)
				513	{
				514	void *addr;
				515	struct vm_struct *area;
				516	unsigned long offset, last_addr;
				517	pgprot_t pgprot;
				518
				519	/* Don't allow wraparound or zero size */
				520	last_addr = phys_addr + size - 1;
				521	if (!size \|\| last_addr < phys_addr)
				522	return NULL;
				523
				524	/* Create a read/write, MMIO VA mapping homed at the requested shim. */
				525	pgprot = PAGE_KERNEL;
				526	pgprot = hv_pte_set_mode(pgprot, HV_PTE_MODE_MMIO);
				527	pgprot = hv_pte_set_lotar(pgprot, hv_pte_get_lotar(home));
				528
				529	/*
				530	* Mappings have to be page-aligned
				531	*/
				532	offset = phys_addr & ~PAGE_MASK;
				533	phys_addr &= PAGE_MASK;
				534	size = PAGE_ALIGN(last_addr+1) - phys_addr;
				535
				536	/*
				537	* Ok, go for it..
				538	*/
				539	area = get_vm_area(size, VM_IOREMAP /* \| other flags? */);
				540	if (!area)
				541	return NULL;
				542	area->phys_addr = phys_addr;
				543	addr = area->addr;
				544	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
				545	phys_addr, pgprot)) {
Chris Metcalf	fad052d	2013-08-07 15:42:34 -0400	[diff] [blame]	546	free_vm_area(area);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	547	return NULL;
				548	}
				549	return (__force void __iomem ) (offset + (char )addr);
				550	}
				551	EXPORT_SYMBOL(ioremap_prot);
				552
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	553	/* Unmap an MMIO VA mapping. */
				554	void iounmap(volatile void __iomem *addr_in)
				555	{
				556	volatile void __iomem addr = (volatile void __iomem )
				557	(PAGE_MASK & (unsigned long __force)addr_in);
				558	#if 1
				559	vunmap((void * __force)addr);
				560	#else
				561	/* x86 uses this complicated flow instead of vunmap(). Is
				562	* there any particular reason we should do the same? */
				563	struct vm_struct p, o;
				564
				565	/* Use the vm area unlocked, assuming the caller
				566	ensures there isn't another iounmap for the same address
				567	in parallel. Reuse of the virtual address is prevented by
				568	leaving it in the global lists until we're done with it.
				569	cpa takes care of the direct mappings. */
Joonsoo Kim	ef93247	2013-04-29 15:07:27 -0700	[diff] [blame]	570	p = find_vm_area((void *)addr);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	571
				572	if (!p) {
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	573	pr_err("iounmap: bad address %p\n", addr);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	574	dump_stack();
				575	return;
				576	}
				577
				578	/* Finally remove it */
				579	o = remove_vm_area((void *)addr);
				580	BUG_ON(p != o \|\| o == NULL);
				581	kfree(p);
				582	#endif
				583	}
				584	EXPORT_SYMBOL(iounmap);
				585
				586	#endif /* CHIP_HAS_MMIO() */