Blame - arch/tile/mm/pgtable.c - kernel/msm-5.4

blob: e41487e22163f2d356be4d150f10d9bec15c31d3 [file] [log] [blame]

Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	1	/*
				2	* Copyright 2010 Tilera Corporation. All Rights Reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public License
				6	* as published by the Free Software Foundation, version 2.
				7	*
				8	* This program is distributed in the hope that it will be useful, but
				9	* WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
				11	* NON INFRINGEMENT. See the GNU General Public License for
				12	* more details.
				13	*/
				14
				15	#include <linux/sched.h>
				16	#include <linux/kernel.h>
				17	#include <linux/errno.h>
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	20	#include <linux/highmem.h>
				21	#include <linux/slab.h>
				22	#include <linux/pagemap.h>
				23	#include <linux/spinlock.h>
				24	#include <linux/cpumask.h>
				25	#include <linux/module.h>
				26	#include <linux/io.h>
				27	#include <linux/vmalloc.h>
				28	#include <linux/smp.h>
				29
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	30	#include <asm/pgtable.h>
				31	#include <asm/pgalloc.h>
				32	#include <asm/fixmap.h>
				33	#include <asm/tlb.h>
				34	#include <asm/tlbflush.h>
				35	#include <asm/homecache.h>
				36
				37	#define K(x) ((x) << (PAGE_SHIFT-10))
				38
				39	/*
				40	* The normal show_free_areas() is too verbose on Tile, with dozens
				41	* of processors and often four NUMA zones each with high and lowmem.
				42	*/
David Rientjes	b2b755b	2011-03-24 15:18:15 -0700	[diff] [blame]	43	void show_mem(unsigned int filter)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	44	{
				45	struct zone *zone;
				46
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	47	pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu"
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	48	" free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu"
				49	" pagecache:%lu swap:%lu\n",
				50	(global_page_state(NR_ACTIVE_ANON) +
				51	global_page_state(NR_ACTIVE_FILE)),
				52	(global_page_state(NR_INACTIVE_ANON) +
				53	global_page_state(NR_INACTIVE_FILE)),
				54	global_page_state(NR_FILE_DIRTY),
				55	global_page_state(NR_WRITEBACK),
				56	global_page_state(NR_UNSTABLE_NFS),
				57	global_page_state(NR_FREE_PAGES),
				58	(global_page_state(NR_SLAB_RECLAIMABLE) +
				59	global_page_state(NR_SLAB_UNRECLAIMABLE)),
				60	global_page_state(NR_FILE_MAPPED),
				61	global_page_state(NR_PAGETABLE),
				62	global_page_state(NR_BOUNCE),
				63	global_page_state(NR_FILE_PAGES),
Shaohua Li	ec8acf2	2013-02-22 16:34:38 -0800	[diff] [blame]	64	get_nr_swap_pages());
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	65
				66	for_each_zone(zone) {
				67	unsigned long flags, order, total = 0, largest_order = -1;
				68
				69	if (!populated_zone(zone))
				70	continue;
				71
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	72	spin_lock_irqsave(&zone->lock, flags);
				73	for (order = 0; order < MAX_ORDER; order++) {
				74	int nr = zone->free_area[order].nr_free;
				75	total += nr << order;
				76	if (nr)
				77	largest_order = order;
				78	}
				79	spin_unlock_irqrestore(&zone->lock, flags);
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	80	pr_err("Node %d %7s: %lukB (largest %luKb)\n",
				81	zone_to_nid(zone), zone->name,
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	82	K(total), largest_order ? K(1UL) << largest_order : 0);
				83	}
				84	}
				85
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	86	/**
				87	* shatter_huge_page() - ensure a given address is mapped by a small page.
				88	*
				89	* This function converts a huge PTE mapping kernel LOWMEM into a bunch
				90	* of small PTEs with the same caching. No cache flush required, but we
				91	* must do a global TLB flush.
				92	*
				93	* Any caller that wishes to modify a kernel mapping that might
				94	* have been made with a huge page should call this function,
				95	* since doing so properly avoids race conditions with installing the
				96	* newly-shattered page and then flushing all the TLB entries.
				97	*
				98	* @addr: Address at which to shatter any existing huge page.
				99	*/
				100	void shatter_huge_page(unsigned long addr)
				101	{
				102	pgd_t *pgd;
				103	pud_t *pud;
				104	pmd_t *pmd;
				105	unsigned long flags = 0; /* happy compiler */
				106	#ifdef __PAGETABLE_PMD_FOLDED
				107	struct list_head *pos;
				108	#endif
				109
				110	/* Get a pointer to the pmd entry that we need to change. */
				111	addr &= HPAGE_MASK;
				112	BUG_ON(pgd_addr_invalid(addr));
				113	BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */
				114	pgd = swapper_pg_dir + pgd_index(addr);
				115	pud = pud_offset(pgd, addr);
				116	BUG_ON(!pud_present(*pud));
				117	pmd = pmd_offset(pud, addr);
				118	BUG_ON(!pmd_present(*pmd));
				119	if (!pmd_huge_page(*pmd))
				120	return;
				121
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	122	spin_lock_irqsave(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	123	if (!pmd_huge_page(*pmd)) {
				124	/* Lost the race to convert the huge page. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	125	spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	126	return;
				127	}
				128
				129	/* Shatter the huge page into the preallocated L2 page table. */
				130	pmd_populate_kernel(&init_mm, pmd,
				131	get_prealloc_pte(pte_pfn((pte_t )pmd)));
				132
				133	#ifdef __PAGETABLE_PMD_FOLDED
				134	/* Walk every pgd on the system and update the pmd there. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	135	spin_lock(&pgd_lock);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	136	list_for_each(pos, &pgd_list) {
				137	pmd_t *copy_pmd;
				138	pgd = list_to_pgd(pos) + pgd_index(addr);
				139	pud = pud_offset(pgd, addr);
				140	copy_pmd = pmd_offset(pud, addr);
				141	__set_pmd(copy_pmd, *pmd);
				142	}
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	143	spin_unlock(&pgd_lock);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	144	#endif
				145
				146	/* Tell every cpu to notice the change. */
				147	flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE,
				148	cpu_possible_mask, NULL, 0);
				149
				150	/* Hold the lock until the TLB flush is finished to avoid races. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	151	spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	152	}
				153
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	154	/*
				155	* List of all pgd's needed so it can invalidate entries in both cached
				156	* and uncached pgd's. This is essentially codepath-based locking
				157	* against pageattr.c; it is the unique case in which a valid change
				158	* of kernel pagetables can't be lazily synchronized by vmalloc faults.
				159	* vmalloc faults work because attached pagetables are never freed.
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	160	*
				161	* The lock is always taken with interrupts disabled, unlike on x86
				162	* and other platforms, because we need to take the lock in
				163	* shatter_huge_page(), which may be called from an interrupt context.
				164	* We are not at risk from the tlbflush IPI deadlock that was seen on
				165	* x86, since we use the flush_remote() API to have the hypervisor do
				166	* the TLB flushes regardless of irq disabling.
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	167	*/
				168	DEFINE_SPINLOCK(pgd_lock);
				169	LIST_HEAD(pgd_list);
				170
				171	static inline void pgd_list_add(pgd_t *pgd)
				172	{
				173	list_add(pgd_to_list(pgd), &pgd_list);
				174	}
				175
				176	static inline void pgd_list_del(pgd_t *pgd)
				177	{
				178	list_del(pgd_to_list(pgd));
				179	}
				180
				181	#define KERNEL_PGD_INDEX_START pgd_index(PAGE_OFFSET)
				182	#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_INDEX_START)
				183
				184	static void pgd_ctor(pgd_t *pgd)
				185	{
				186	unsigned long flags;
				187
				188	memset(pgd, 0, KERNEL_PGD_INDEX_START*sizeof(pgd_t));
				189	spin_lock_irqsave(&pgd_lock, flags);
				190
				191	#ifndef __tilegx__
				192	/*
				193	* Check that the user interrupt vector has no L2.
				194	* It never should for the swapper, and new page tables
				195	* should always start with an empty user interrupt vector.
				196	*/
				197	BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
				198	#endif
				199
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	200	memcpy(pgd + KERNEL_PGD_INDEX_START,
				201	swapper_pg_dir + KERNEL_PGD_INDEX_START,
				202	KERNEL_PGD_PTRS * sizeof(pgd_t));
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	203
				204	pgd_list_add(pgd);
				205	spin_unlock_irqrestore(&pgd_lock, flags);
				206	}
				207
				208	static void pgd_dtor(pgd_t *pgd)
				209	{
				210	unsigned long flags; /* can be called from interrupt context */
				211
				212	spin_lock_irqsave(&pgd_lock, flags);
				213	pgd_list_del(pgd);
				214	spin_unlock_irqrestore(&pgd_lock, flags);
				215	}
				216
				217	pgd_t pgd_alloc(struct mm_struct mm)
				218	{
				219	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
				220	if (pgd)
				221	pgd_ctor(pgd);
				222	return pgd;
				223	}
				224
				225	void pgd_free(struct mm_struct mm, pgd_t pgd)
				226	{
				227	pgd_dtor(pgd);
				228	kmem_cache_free(pgd_cache, pgd);
				229	}
				230
				231
				232	#define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER)
				233
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	234	struct page pgtable_alloc_one(struct mm_struct mm, unsigned long address,
				235	int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	236	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	237	gfp_t flags = GFP_KERNEL\|__GFP_REPEAT\|__GFP_ZERO;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	238	struct page *p;
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	239	int i;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	240
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	241	p = alloc_pages(flags, L2_USER_PGTABLE_ORDER);
				242	if (p == NULL)
				243	return NULL;
				244
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	245	/*
				246	* Make every page have a page_count() of one, not just the first.
				247	* We don't use __GFP_COMP since it doesn't look like it works
				248	* correctly with tlb_remove_page().
				249	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	250	for (i = 1; i < order; ++i) {
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	251	init_page_count(p+i);
				252	inc_zone_page_state(p+i, NR_PAGETABLE);
				253	}
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	254
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	255	pgtable_page_ctor(p);
				256	return p;
				257	}
				258
				259	/*
				260	* Free page immediately (used in __pte_alloc if we raced with another
				261	* process). We have to correct whatever pte_alloc_one() did before
				262	* returning the pages to the allocator.
				263	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	264	void pgtable_free(struct mm_struct mm, struct page p, int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	265	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	266	int i;
				267
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	268	pgtable_page_dtor(p);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	269	__free_page(p);
				270
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	271	for (i = 1; i < order; ++i) {
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	272	__free_page(p+i);
				273	dec_zone_page_state(p+i, NR_PAGETABLE);
				274	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	275	}
				276
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	277	void __pgtable_free_tlb(struct mmu_gather tlb, struct page pte,
				278	unsigned long address, int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	279	{
				280	int i;
				281
				282	pgtable_page_dtor(pte);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	283	tlb_remove_page(tlb, pte);
				284
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	285	for (i = 1; i < order; ++i) {
Peter Zijlstra	342d87e	2011-01-25 18:31:12 +0100	[diff] [blame]	286	tlb_remove_page(tlb, pte + i);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	287	dec_zone_page_state(pte + i, NR_PAGETABLE);
				288	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	289	}
				290
				291	#ifndef __tilegx__
				292
				293	/*
				294	* FIXME: needs to be atomic vs hypervisor writes. For now we make the
				295	* window of vulnerability a bit smaller by doing an unlocked 8-bit update.
				296	*/
				297	int ptep_test_and_clear_young(struct vm_area_struct *vma,
				298	unsigned long addr, pte_t *ptep)
				299	{
				300	#if HV_PTE_INDEX_ACCESSED < 8 \|\| HV_PTE_INDEX_ACCESSED >= 16
				301	# error Code assumes HV_PTE "accessed" bit in second byte
				302	#endif
				303	u8 tmp = (u8 )ptep;
				304	u8 second_byte = tmp[1];
				305	if (!(second_byte & (1 << (HV_PTE_INDEX_ACCESSED - 8))))
				306	return 0;
				307	tmp[1] = second_byte & ~(1 << (HV_PTE_INDEX_ACCESSED - 8));
				308	return 1;
				309	}
				310
				311	/*
				312	* This implementation is atomic vs hypervisor writes, since the hypervisor
				313	* always writes the low word (where "accessed" and "dirty" are) and this
				314	* routine only writes the high word.
				315	*/
				316	void ptep_set_wrprotect(struct mm_struct *mm,
				317	unsigned long addr, pte_t *ptep)
				318	{
				319	#if HV_PTE_INDEX_WRITABLE < 32
				320	# error Code assumes HV_PTE "writable" bit in high word
				321	#endif
				322	u32 tmp = (u32 )ptep;
				323	tmp[1] = tmp[1] & ~(1 << (HV_PTE_INDEX_WRITABLE - 32));
				324	}
				325
				326	#endif
				327
				328	pte_t virt_to_pte(struct mm_struct mm, unsigned long addr)
				329	{
				330	pgd_t *pgd;
				331	pud_t *pud;
				332	pmd_t *pmd;
				333
				334	if (pgd_addr_invalid(addr))
				335	return NULL;
				336
				337	pgd = mm ? pgd_offset(mm, addr) : swapper_pg_dir + pgd_index(addr);
				338	pud = pud_offset(pgd, addr);
				339	if (!pud_present(*pud))
				340	return NULL;
				341	pmd = pmd_offset(pud, addr);
				342	if (pmd_huge_page(*pmd))
				343	return (pte_t *)pmd;
				344	if (!pmd_present(*pmd))
				345	return NULL;
				346	return pte_offset_kernel(pmd, addr);
				347	}
				348
				349	pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu)
				350	{
				351	unsigned int width = smp_width;
				352	int x = cpu % width;
				353	int y = cpu / width;
				354	BUG_ON(y >= smp_height);
				355	BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
				356	BUG_ON(cpu < 0 \|\| cpu >= NR_CPUS);
				357	BUG_ON(!cpu_is_valid_lotar(cpu));
				358	return hv_pte_set_lotar(prot, HV_XY_TO_LOTAR(x, y));
				359	}
				360
				361	int get_remote_cache_cpu(pgprot_t prot)
				362	{
				363	HV_LOTAR lotar = hv_pte_get_lotar(prot);
				364	int x = HV_LOTAR_X(lotar);
				365	int y = HV_LOTAR_Y(lotar);
				366	BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
				367	return x + y * smp_width;
				368	}
				369
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	370	/*
				371	* Convert a kernel VA to a PA and homing information.
				372	*/
				373	int va_to_cpa_and_pte(void va, unsigned long long cpa, pte_t *pte)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	374	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	375	struct page *page = virt_to_page(va);
				376	pte_t null_pte = { 0 };
				377
				378	*cpa = __pa(va);
				379
				380	/* Note that this is not writing a page table, just returning a pte. */
				381	*pte = pte_set_home(null_pte, page_home(page));
				382
				383	return 0; /* return non-zero if not hfh? */
				384	}
				385	EXPORT_SYMBOL(va_to_cpa_and_pte);
				386
				387	void __set_pte(pte_t *ptep, pte_t pte)
				388	{
				389	#ifdef __tilegx__
				390	*ptep = pte;
				391	#else
				392	# if HV_PTE_INDEX_PRESENT >= 32 \|\| HV_PTE_INDEX_MIGRATING >= 32
				393	# error Must write the present and migrating bits last
				394	# endif
				395	if (pte_present(pte)) {
				396	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
				397	barrier();
				398	((u32 *)ptep)[0] = (u32)(pte_val(pte));
				399	} else {
				400	((u32 *)ptep)[0] = (u32)(pte_val(pte));
				401	barrier();
				402	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
				403	}
				404	#endif /* __tilegx__ */
				405	}
				406
				407	void set_pte(pte_t *ptep, pte_t pte)
				408	{
Chris Metcalf	12400f1	2012-03-29 15:36:53 -0400	[diff] [blame]	409	if (pte_present(pte) &&
				410	(!CHIP_HAS_MMIO() \|\| hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) {
				411	/* The PTE actually references physical memory. */
				412	unsigned long pfn = pte_pfn(pte);
				413	if (pfn_valid(pfn)) {
				414	/* Update the home of the PTE from the struct page. */
				415	pte = pte_set_home(pte, page_home(pfn_to_page(pfn)));
				416	} else if (hv_pte_get_mode(pte) == 0) {
				417	/* remap_pfn_range(), etc, must supply PTE mode. */
				418	panic("set_pte(): out-of-range PFN and mode 0\n");
				419	}
				420	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	421
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	422	__set_pte(ptep, pte);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	423	}
				424
				425	/* Can this mm load a PTE with cached_priority set? */
				426	static inline int mm_is_priority_cached(struct mm_struct *mm)
				427	{
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	428	return mm->context.priority_cached != 0;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	429	}
				430
				431	/*
				432	* Add a priority mapping to an mm_context and
				433	* notify the hypervisor if this is the first one.
				434	*/
				435	void start_mm_caching(struct mm_struct *mm)
				436	{
				437	if (!mm_is_priority_cached(mm)) {
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	438	mm->context.priority_cached = -1UL;
				439	hv_set_caching(-1UL);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	440	}
				441	}
				442
				443	/*
				444	* Validate and return the priority_cached flag. We know if it's zero
				445	* that we don't need to scan, since we immediately set it non-zero
				446	* when we first consider a MAP_CACHE_PRIORITY mapping.
				447	*
				448	* We only _try_ to acquire the mmap_sem semaphore; if we can't acquire it,
				449	* since we're in an interrupt context (servicing switch_mm) we don't
				450	* worry about it and don't unset the "priority_cached" field.
				451	* Presumably we'll come back later and have more luck and clear
				452	* the value then; for now we'll just keep the cache marked for priority.
				453	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	454	static unsigned long update_priority_cached(struct mm_struct *mm)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	455	{
				456	if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) {
				457	struct vm_area_struct *vm;
				458	for (vm = mm->mmap; vm; vm = vm->vm_next) {
				459	if (hv_pte_get_cached_priority(vm->vm_page_prot))
				460	break;
				461	}
				462	if (vm == NULL)
				463	mm->context.priority_cached = 0;
				464	up_write(&mm->mmap_sem);
				465	}
				466	return mm->context.priority_cached;
				467	}
				468
				469	/* Set caching correctly for an mm that we are switching to. */
				470	void check_mm_caching(struct mm_struct prev, struct mm_struct next)
				471	{
				472	if (!mm_is_priority_cached(next)) {
				473	/*
				474	* If the new mm doesn't use priority caching, just see if we
				475	* need the hv_set_caching(), or can assume it's already zero.
				476	*/
				477	if (mm_is_priority_cached(prev))
				478	hv_set_caching(0);
				479	} else {
				480	hv_set_caching(update_priority_cached(next));
				481	}
				482	}
				483
				484	#if CHIP_HAS_MMIO()
				485
				486	/* Map an arbitrary MMIO address, homed according to pgprot, into VA space. */
				487	void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
				488	pgprot_t home)
				489	{
				490	void *addr;
				491	struct vm_struct *area;
				492	unsigned long offset, last_addr;
				493	pgprot_t pgprot;
				494
				495	/* Don't allow wraparound or zero size */
				496	last_addr = phys_addr + size - 1;
				497	if (!size \|\| last_addr < phys_addr)
				498	return NULL;
				499
				500	/* Create a read/write, MMIO VA mapping homed at the requested shim. */
				501	pgprot = PAGE_KERNEL;
				502	pgprot = hv_pte_set_mode(pgprot, HV_PTE_MODE_MMIO);
				503	pgprot = hv_pte_set_lotar(pgprot, hv_pte_get_lotar(home));
				504
				505	/*
				506	* Mappings have to be page-aligned
				507	*/
				508	offset = phys_addr & ~PAGE_MASK;
				509	phys_addr &= PAGE_MASK;
				510	size = PAGE_ALIGN(last_addr+1) - phys_addr;
				511
				512	/*
				513	* Ok, go for it..
				514	*/
				515	area = get_vm_area(size, VM_IOREMAP /* \| other flags? */);
				516	if (!area)
				517	return NULL;
				518	area->phys_addr = phys_addr;
				519	addr = area->addr;
				520	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
				521	phys_addr, pgprot)) {
Chris Metcalf	fad052d	2013-08-07 15:42:34 -0400	[diff] [blame]	522	free_vm_area(area);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	523	return NULL;
				524	}
				525	return (__force void __iomem ) (offset + (char )addr);
				526	}
				527	EXPORT_SYMBOL(ioremap_prot);
				528
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	529	/* Unmap an MMIO VA mapping. */
				530	void iounmap(volatile void __iomem *addr_in)
				531	{
				532	volatile void __iomem addr = (volatile void __iomem )
				533	(PAGE_MASK & (unsigned long __force)addr_in);
				534	#if 1
				535	vunmap((void * __force)addr);
				536	#else
				537	/* x86 uses this complicated flow instead of vunmap(). Is
				538	* there any particular reason we should do the same? */
				539	struct vm_struct p, o;
				540
				541	/* Use the vm area unlocked, assuming the caller
				542	ensures there isn't another iounmap for the same address
				543	in parallel. Reuse of the virtual address is prevented by
				544	leaving it in the global lists until we're done with it.
				545	cpa takes care of the direct mappings. */
Joonsoo Kim	ef93247	2013-04-29 15:07:27 -0700	[diff] [blame]	546	p = find_vm_area((void *)addr);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	547
				548	if (!p) {
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	549	pr_err("iounmap: bad address %p\n", addr);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	550	dump_stack();
				551	return;
				552	}
				553
				554	/* Finally remove it */
				555	o = remove_vm_area((void *)addr);
				556	BUG_ON(p != o \|\| o == NULL);
				557	kfree(p);
				558	#endif
				559	}
				560	EXPORT_SYMBOL(iounmap);
				561
				562	#endif /* CHIP_HAS_MMIO() */