Blame - arch/tile/mm/pgtable.c - kernel/msm-4.9

blob: 3d7074347e6dab2d16749d173e444d8e8a405627 [file] [log] [blame]

Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	1	/*
				2	* Copyright 2010 Tilera Corporation. All Rights Reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public License
				6	* as published by the Free Software Foundation, version 2.
				7	*
				8	* This program is distributed in the hope that it will be useful, but
				9	* WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
				11	* NON INFRINGEMENT. See the GNU General Public License for
				12	* more details.
				13	*/
				14
				15	#include <linux/sched.h>
				16	#include <linux/kernel.h>
				17	#include <linux/errno.h>
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	20	#include <linux/highmem.h>
				21	#include <linux/slab.h>
				22	#include <linux/pagemap.h>
				23	#include <linux/spinlock.h>
				24	#include <linux/cpumask.h>
				25	#include <linux/module.h>
				26	#include <linux/io.h>
				27	#include <linux/vmalloc.h>
				28	#include <linux/smp.h>
				29
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	30	#include <asm/pgtable.h>
				31	#include <asm/pgalloc.h>
				32	#include <asm/fixmap.h>
				33	#include <asm/tlb.h>
				34	#include <asm/tlbflush.h>
				35	#include <asm/homecache.h>
				36
				37	#define K(x) ((x) << (PAGE_SHIFT-10))
				38
				39	/*
				40	* The normal show_free_areas() is too verbose on Tile, with dozens
				41	* of processors and often four NUMA zones each with high and lowmem.
				42	*/
David Rientjes	b2b755b	2011-03-24 15:18:15 -0700	[diff] [blame]	43	void show_mem(unsigned int filter)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	44	{
				45	struct zone *zone;
				46
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	47	pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu"
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	48	" free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu"
				49	" pagecache:%lu swap:%lu\n",
				50	(global_page_state(NR_ACTIVE_ANON) +
				51	global_page_state(NR_ACTIVE_FILE)),
				52	(global_page_state(NR_INACTIVE_ANON) +
				53	global_page_state(NR_INACTIVE_FILE)),
				54	global_page_state(NR_FILE_DIRTY),
				55	global_page_state(NR_WRITEBACK),
				56	global_page_state(NR_UNSTABLE_NFS),
				57	global_page_state(NR_FREE_PAGES),
				58	(global_page_state(NR_SLAB_RECLAIMABLE) +
				59	global_page_state(NR_SLAB_UNRECLAIMABLE)),
				60	global_page_state(NR_FILE_MAPPED),
				61	global_page_state(NR_PAGETABLE),
				62	global_page_state(NR_BOUNCE),
				63	global_page_state(NR_FILE_PAGES),
				64	nr_swap_pages);
				65
				66	for_each_zone(zone) {
				67	unsigned long flags, order, total = 0, largest_order = -1;
				68
				69	if (!populated_zone(zone))
				70	continue;
				71
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	72	spin_lock_irqsave(&zone->lock, flags);
				73	for (order = 0; order < MAX_ORDER; order++) {
				74	int nr = zone->free_area[order].nr_free;
				75	total += nr << order;
				76	if (nr)
				77	largest_order = order;
				78	}
				79	spin_unlock_irqrestore(&zone->lock, flags);
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	80	pr_err("Node %d %7s: %lukB (largest %luKb)\n",
				81	zone_to_nid(zone), zone->name,
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	82	K(total), largest_order ? K(1UL) << largest_order : 0);
				83	}
				84	}
				85
				86	/*
				87	* Associate a virtual page frame with a given physical page frame
				88	* and protection flags for that frame.
				89	*/
				90	static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
				91	{
				92	pgd_t *pgd;
				93	pud_t *pud;
				94	pmd_t *pmd;
				95	pte_t *pte;
				96
				97	pgd = swapper_pg_dir + pgd_index(vaddr);
				98	if (pgd_none(*pgd)) {
				99	BUG();
				100	return;
				101	}
				102	pud = pud_offset(pgd, vaddr);
				103	if (pud_none(*pud)) {
				104	BUG();
				105	return;
				106	}
				107	pmd = pmd_offset(pud, vaddr);
				108	if (pmd_none(*pmd)) {
				109	BUG();
				110	return;
				111	}
				112	pte = pte_offset_kernel(pmd, vaddr);
				113	/* <pfn,flags> stored as-is, to permit clearing entries */
				114	set_pte(pte, pfn_pte(pfn, flags));
				115
				116	/*
				117	* It's enough to flush this one mapping.
				118	* This appears conservative since it is only called
				119	* from __set_fixmap.
				120	*/
				121	local_flush_tlb_page(NULL, vaddr, PAGE_SIZE);
				122	}
				123
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	124	void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
				125	{
				126	unsigned long address = __fix_to_virt(idx);
				127
				128	if (idx >= __end_of_fixed_addresses) {
				129	BUG();
				130	return;
				131	}
				132	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
				133	}
				134
				135	#if defined(CONFIG_HIGHPTE)
Chris Metcalf	38a6f42	2010-11-01 15:21:35 -0400	[diff] [blame]	136	pte_t _pte_offset_map(pmd_t dir, unsigned long address)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	137	{
Chris Metcalf	38a6f42	2010-11-01 15:21:35 -0400	[diff] [blame]	138	pte_t pte = kmap_atomic(pmd_page(dir)) +
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	139	(pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK;
				140	return &pte[pte_index(address)];
				141	}
				142	#endif
				143
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	144	/**
				145	* shatter_huge_page() - ensure a given address is mapped by a small page.
				146	*
				147	* This function converts a huge PTE mapping kernel LOWMEM into a bunch
				148	* of small PTEs with the same caching. No cache flush required, but we
				149	* must do a global TLB flush.
				150	*
				151	* Any caller that wishes to modify a kernel mapping that might
				152	* have been made with a huge page should call this function,
				153	* since doing so properly avoids race conditions with installing the
				154	* newly-shattered page and then flushing all the TLB entries.
				155	*
				156	* @addr: Address at which to shatter any existing huge page.
				157	*/
				158	void shatter_huge_page(unsigned long addr)
				159	{
				160	pgd_t *pgd;
				161	pud_t *pud;
				162	pmd_t *pmd;
				163	unsigned long flags = 0; /* happy compiler */
				164	#ifdef __PAGETABLE_PMD_FOLDED
				165	struct list_head *pos;
				166	#endif
				167
				168	/* Get a pointer to the pmd entry that we need to change. */
				169	addr &= HPAGE_MASK;
				170	BUG_ON(pgd_addr_invalid(addr));
				171	BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */
				172	pgd = swapper_pg_dir + pgd_index(addr);
				173	pud = pud_offset(pgd, addr);
				174	BUG_ON(!pud_present(*pud));
				175	pmd = pmd_offset(pud, addr);
				176	BUG_ON(!pmd_present(*pmd));
				177	if (!pmd_huge_page(*pmd))
				178	return;
				179
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	180	spin_lock_irqsave(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	181	if (!pmd_huge_page(*pmd)) {
				182	/* Lost the race to convert the huge page. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	183	spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	184	return;
				185	}
				186
				187	/* Shatter the huge page into the preallocated L2 page table. */
				188	pmd_populate_kernel(&init_mm, pmd,
				189	get_prealloc_pte(pte_pfn((pte_t )pmd)));
				190
				191	#ifdef __PAGETABLE_PMD_FOLDED
				192	/* Walk every pgd on the system and update the pmd there. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	193	spin_lock(&pgd_lock);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	194	list_for_each(pos, &pgd_list) {
				195	pmd_t *copy_pmd;
				196	pgd = list_to_pgd(pos) + pgd_index(addr);
				197	pud = pud_offset(pgd, addr);
				198	copy_pmd = pmd_offset(pud, addr);
				199	__set_pmd(copy_pmd, *pmd);
				200	}
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	201	spin_unlock(&pgd_lock);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	202	#endif
				203
				204	/* Tell every cpu to notice the change. */
				205	flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE,
				206	cpu_possible_mask, NULL, 0);
				207
				208	/* Hold the lock until the TLB flush is finished to avoid races. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	209	spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	210	}
				211
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	212	/*
				213	* List of all pgd's needed so it can invalidate entries in both cached
				214	* and uncached pgd's. This is essentially codepath-based locking
				215	* against pageattr.c; it is the unique case in which a valid change
				216	* of kernel pagetables can't be lazily synchronized by vmalloc faults.
				217	* vmalloc faults work because attached pagetables are never freed.
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	218	*
				219	* The lock is always taken with interrupts disabled, unlike on x86
				220	* and other platforms, because we need to take the lock in
				221	* shatter_huge_page(), which may be called from an interrupt context.
				222	* We are not at risk from the tlbflush IPI deadlock that was seen on
				223	* x86, since we use the flush_remote() API to have the hypervisor do
				224	* the TLB flushes regardless of irq disabling.
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	225	*/
				226	DEFINE_SPINLOCK(pgd_lock);
				227	LIST_HEAD(pgd_list);
				228
				229	static inline void pgd_list_add(pgd_t *pgd)
				230	{
				231	list_add(pgd_to_list(pgd), &pgd_list);
				232	}
				233
				234	static inline void pgd_list_del(pgd_t *pgd)
				235	{
				236	list_del(pgd_to_list(pgd));
				237	}
				238
				239	#define KERNEL_PGD_INDEX_START pgd_index(PAGE_OFFSET)
				240	#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_INDEX_START)
				241
				242	static void pgd_ctor(pgd_t *pgd)
				243	{
				244	unsigned long flags;
				245
				246	memset(pgd, 0, KERNEL_PGD_INDEX_START*sizeof(pgd_t));
				247	spin_lock_irqsave(&pgd_lock, flags);
				248
				249	#ifndef __tilegx__
				250	/*
				251	* Check that the user interrupt vector has no L2.
				252	* It never should for the swapper, and new page tables
				253	* should always start with an empty user interrupt vector.
				254	*/
				255	BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
				256	#endif
				257
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	258	memcpy(pgd + KERNEL_PGD_INDEX_START,
				259	swapper_pg_dir + KERNEL_PGD_INDEX_START,
				260	KERNEL_PGD_PTRS * sizeof(pgd_t));
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	261
				262	pgd_list_add(pgd);
				263	spin_unlock_irqrestore(&pgd_lock, flags);
				264	}
				265
				266	static void pgd_dtor(pgd_t *pgd)
				267	{
				268	unsigned long flags; /* can be called from interrupt context */
				269
				270	spin_lock_irqsave(&pgd_lock, flags);
				271	pgd_list_del(pgd);
				272	spin_unlock_irqrestore(&pgd_lock, flags);
				273	}
				274
				275	pgd_t pgd_alloc(struct mm_struct mm)
				276	{
				277	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
				278	if (pgd)
				279	pgd_ctor(pgd);
				280	return pgd;
				281	}
				282
				283	void pgd_free(struct mm_struct mm, pgd_t pgd)
				284	{
				285	pgd_dtor(pgd);
				286	kmem_cache_free(pgd_cache, pgd);
				287	}
				288
				289
				290	#define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER)
				291
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame^]	292	struct page pgtable_alloc_one(struct mm_struct mm, unsigned long address,
				293	int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	294	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	295	gfp_t flags = GFP_KERNEL\|__GFP_REPEAT\|__GFP_ZERO;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	296	struct page *p;
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	297	int i;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	298
				299	#ifdef CONFIG_HIGHPTE
				300	flags \|= __GFP_HIGHMEM;
				301	#endif
				302
				303	p = alloc_pages(flags, L2_USER_PGTABLE_ORDER);
				304	if (p == NULL)
				305	return NULL;
				306
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	307	/*
				308	* Make every page have a page_count() of one, not just the first.
				309	* We don't use __GFP_COMP since it doesn't look like it works
				310	* correctly with tlb_remove_page().
				311	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame^]	312	for (i = 1; i < order; ++i) {
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	313	init_page_count(p+i);
				314	inc_zone_page_state(p+i, NR_PAGETABLE);
				315	}
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	316
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	317	pgtable_page_ctor(p);
				318	return p;
				319	}
				320
				321	/*
				322	* Free page immediately (used in __pte_alloc if we raced with another
				323	* process). We have to correct whatever pte_alloc_one() did before
				324	* returning the pages to the allocator.
				325	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame^]	326	void pgtable_free(struct mm_struct mm, struct page p, int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	327	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	328	int i;
				329
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	330	pgtable_page_dtor(p);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	331	__free_page(p);
				332
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame^]	333	for (i = 1; i < order; ++i) {
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	334	__free_page(p+i);
				335	dec_zone_page_state(p+i, NR_PAGETABLE);
				336	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	337	}
				338
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame^]	339	void __pgtable_free_tlb(struct mmu_gather tlb, struct page pte,
				340	unsigned long address, int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	341	{
				342	int i;
				343
				344	pgtable_page_dtor(pte);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	345	tlb_remove_page(tlb, pte);
				346
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame^]	347	for (i = 1; i < order; ++i) {
Peter Zijlstra	342d87e	2011-01-25 18:31:12 +0100	[diff] [blame]	348	tlb_remove_page(tlb, pte + i);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	349	dec_zone_page_state(pte + i, NR_PAGETABLE);
				350	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	351	}
				352
				353	#ifndef __tilegx__
				354
				355	/*
				356	* FIXME: needs to be atomic vs hypervisor writes. For now we make the
				357	* window of vulnerability a bit smaller by doing an unlocked 8-bit update.
				358	*/
				359	int ptep_test_and_clear_young(struct vm_area_struct *vma,
				360	unsigned long addr, pte_t *ptep)
				361	{
				362	#if HV_PTE_INDEX_ACCESSED < 8 \|\| HV_PTE_INDEX_ACCESSED >= 16
				363	# error Code assumes HV_PTE "accessed" bit in second byte
				364	#endif
				365	u8 tmp = (u8 )ptep;
				366	u8 second_byte = tmp[1];
				367	if (!(second_byte & (1 << (HV_PTE_INDEX_ACCESSED - 8))))
				368	return 0;
				369	tmp[1] = second_byte & ~(1 << (HV_PTE_INDEX_ACCESSED - 8));
				370	return 1;
				371	}
				372
				373	/*
				374	* This implementation is atomic vs hypervisor writes, since the hypervisor
				375	* always writes the low word (where "accessed" and "dirty" are) and this
				376	* routine only writes the high word.
				377	*/
				378	void ptep_set_wrprotect(struct mm_struct *mm,
				379	unsigned long addr, pte_t *ptep)
				380	{
				381	#if HV_PTE_INDEX_WRITABLE < 32
				382	# error Code assumes HV_PTE "writable" bit in high word
				383	#endif
				384	u32 tmp = (u32 )ptep;
				385	tmp[1] = tmp[1] & ~(1 << (HV_PTE_INDEX_WRITABLE - 32));
				386	}
				387
				388	#endif
				389
				390	pte_t virt_to_pte(struct mm_struct mm, unsigned long addr)
				391	{
				392	pgd_t *pgd;
				393	pud_t *pud;
				394	pmd_t *pmd;
				395
				396	if (pgd_addr_invalid(addr))
				397	return NULL;
				398
				399	pgd = mm ? pgd_offset(mm, addr) : swapper_pg_dir + pgd_index(addr);
				400	pud = pud_offset(pgd, addr);
				401	if (!pud_present(*pud))
				402	return NULL;
				403	pmd = pmd_offset(pud, addr);
				404	if (pmd_huge_page(*pmd))
				405	return (pte_t *)pmd;
				406	if (!pmd_present(*pmd))
				407	return NULL;
				408	return pte_offset_kernel(pmd, addr);
				409	}
				410
				411	pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu)
				412	{
				413	unsigned int width = smp_width;
				414	int x = cpu % width;
				415	int y = cpu / width;
				416	BUG_ON(y >= smp_height);
				417	BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
				418	BUG_ON(cpu < 0 \|\| cpu >= NR_CPUS);
				419	BUG_ON(!cpu_is_valid_lotar(cpu));
				420	return hv_pte_set_lotar(prot, HV_XY_TO_LOTAR(x, y));
				421	}
				422
				423	int get_remote_cache_cpu(pgprot_t prot)
				424	{
				425	HV_LOTAR lotar = hv_pte_get_lotar(prot);
				426	int x = HV_LOTAR_X(lotar);
				427	int y = HV_LOTAR_Y(lotar);
				428	BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
				429	return x + y * smp_width;
				430	}
				431
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	432	/*
				433	* Convert a kernel VA to a PA and homing information.
				434	*/
				435	int va_to_cpa_and_pte(void va, unsigned long long cpa, pte_t *pte)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	436	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	437	struct page *page = virt_to_page(va);
				438	pte_t null_pte = { 0 };
				439
				440	*cpa = __pa(va);
				441
				442	/* Note that this is not writing a page table, just returning a pte. */
				443	*pte = pte_set_home(null_pte, page_home(page));
				444
				445	return 0; /* return non-zero if not hfh? */
				446	}
				447	EXPORT_SYMBOL(va_to_cpa_and_pte);
				448
				449	void __set_pte(pte_t *ptep, pte_t pte)
				450	{
				451	#ifdef __tilegx__
				452	*ptep = pte;
				453	#else
				454	# if HV_PTE_INDEX_PRESENT >= 32 \|\| HV_PTE_INDEX_MIGRATING >= 32
				455	# error Must write the present and migrating bits last
				456	# endif
				457	if (pte_present(pte)) {
				458	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
				459	barrier();
				460	((u32 *)ptep)[0] = (u32)(pte_val(pte));
				461	} else {
				462	((u32 *)ptep)[0] = (u32)(pte_val(pte));
				463	barrier();
				464	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
				465	}
				466	#endif /* __tilegx__ */
				467	}
				468
				469	void set_pte(pte_t *ptep, pte_t pte)
				470	{
Chris Metcalf	12400f1	2012-03-29 15:36:53 -0400	[diff] [blame]	471	if (pte_present(pte) &&
				472	(!CHIP_HAS_MMIO() \|\| hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) {
				473	/* The PTE actually references physical memory. */
				474	unsigned long pfn = pte_pfn(pte);
				475	if (pfn_valid(pfn)) {
				476	/* Update the home of the PTE from the struct page. */
				477	pte = pte_set_home(pte, page_home(pfn_to_page(pfn)));
				478	} else if (hv_pte_get_mode(pte) == 0) {
				479	/* remap_pfn_range(), etc, must supply PTE mode. */
				480	panic("set_pte(): out-of-range PFN and mode 0\n");
				481	}
				482	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	483
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	484	__set_pte(ptep, pte);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	485	}
				486
				487	/* Can this mm load a PTE with cached_priority set? */
				488	static inline int mm_is_priority_cached(struct mm_struct *mm)
				489	{
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame^]	490	return mm->context.priority_cached != 0;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	491	}
				492
				493	/*
				494	* Add a priority mapping to an mm_context and
				495	* notify the hypervisor if this is the first one.
				496	*/
				497	void start_mm_caching(struct mm_struct *mm)
				498	{
				499	if (!mm_is_priority_cached(mm)) {
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame^]	500	mm->context.priority_cached = -1UL;
				501	hv_set_caching(-1UL);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	502	}
				503	}
				504
				505	/*
				506	* Validate and return the priority_cached flag. We know if it's zero
				507	* that we don't need to scan, since we immediately set it non-zero
				508	* when we first consider a MAP_CACHE_PRIORITY mapping.
				509	*
				510	* We only _try_ to acquire the mmap_sem semaphore; if we can't acquire it,
				511	* since we're in an interrupt context (servicing switch_mm) we don't
				512	* worry about it and don't unset the "priority_cached" field.
				513	* Presumably we'll come back later and have more luck and clear
				514	* the value then; for now we'll just keep the cache marked for priority.
				515	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame^]	516	static unsigned long update_priority_cached(struct mm_struct *mm)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	517	{
				518	if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) {
				519	struct vm_area_struct *vm;
				520	for (vm = mm->mmap; vm; vm = vm->vm_next) {
				521	if (hv_pte_get_cached_priority(vm->vm_page_prot))
				522	break;
				523	}
				524	if (vm == NULL)
				525	mm->context.priority_cached = 0;
				526	up_write(&mm->mmap_sem);
				527	}
				528	return mm->context.priority_cached;
				529	}
				530
				531	/* Set caching correctly for an mm that we are switching to. */
				532	void check_mm_caching(struct mm_struct prev, struct mm_struct next)
				533	{
				534	if (!mm_is_priority_cached(next)) {
				535	/*
				536	* If the new mm doesn't use priority caching, just see if we
				537	* need the hv_set_caching(), or can assume it's already zero.
				538	*/
				539	if (mm_is_priority_cached(prev))
				540	hv_set_caching(0);
				541	} else {
				542	hv_set_caching(update_priority_cached(next));
				543	}
				544	}
				545
				546	#if CHIP_HAS_MMIO()
				547
				548	/* Map an arbitrary MMIO address, homed according to pgprot, into VA space. */
				549	void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
				550	pgprot_t home)
				551	{
				552	void *addr;
				553	struct vm_struct *area;
				554	unsigned long offset, last_addr;
				555	pgprot_t pgprot;
				556
				557	/* Don't allow wraparound or zero size */
				558	last_addr = phys_addr + size - 1;
				559	if (!size \|\| last_addr < phys_addr)
				560	return NULL;
				561
				562	/* Create a read/write, MMIO VA mapping homed at the requested shim. */
				563	pgprot = PAGE_KERNEL;
				564	pgprot = hv_pte_set_mode(pgprot, HV_PTE_MODE_MMIO);
				565	pgprot = hv_pte_set_lotar(pgprot, hv_pte_get_lotar(home));
				566
				567	/*
				568	* Mappings have to be page-aligned
				569	*/
				570	offset = phys_addr & ~PAGE_MASK;
				571	phys_addr &= PAGE_MASK;
				572	size = PAGE_ALIGN(last_addr+1) - phys_addr;
				573
				574	/*
				575	* Ok, go for it..
				576	*/
				577	area = get_vm_area(size, VM_IOREMAP /* \| other flags? */);
				578	if (!area)
				579	return NULL;
				580	area->phys_addr = phys_addr;
				581	addr = area->addr;
				582	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
				583	phys_addr, pgprot)) {
				584	remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
				585	return NULL;
				586	}
				587	return (__force void __iomem ) (offset + (char )addr);
				588	}
				589	EXPORT_SYMBOL(ioremap_prot);
				590
				591	/* Map a PCI MMIO bus address into VA space. */
				592	void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
				593	{
				594	panic("ioremap for PCI MMIO is not supported");
				595	}
				596	EXPORT_SYMBOL(ioremap);
				597
				598	/* Unmap an MMIO VA mapping. */
				599	void iounmap(volatile void __iomem *addr_in)
				600	{
				601	volatile void __iomem addr = (volatile void __iomem )
				602	(PAGE_MASK & (unsigned long __force)addr_in);
				603	#if 1
				604	vunmap((void * __force)addr);
				605	#else
				606	/* x86 uses this complicated flow instead of vunmap(). Is
				607	* there any particular reason we should do the same? */
				608	struct vm_struct p, o;
				609
				610	/* Use the vm area unlocked, assuming the caller
				611	ensures there isn't another iounmap for the same address
				612	in parallel. Reuse of the virtual address is prevented by
				613	leaving it in the global lists until we're done with it.
				614	cpa takes care of the direct mappings. */
				615	read_lock(&vmlist_lock);
				616	for (p = vmlist; p; p = p->next) {
				617	if (p->addr == addr)
				618	break;
				619	}
				620	read_unlock(&vmlist_lock);
				621
				622	if (!p) {
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	623	pr_err("iounmap: bad address %p\n", addr);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	624	dump_stack();
				625	return;
				626	}
				627
				628	/* Finally remove it */
				629	o = remove_vm_area((void *)addr);
				630	BUG_ON(p != o \|\| o == NULL);
				631	kfree(p);
				632	#endif
				633	}
				634	EXPORT_SYMBOL(iounmap);
				635
				636	#endif /* CHIP_HAS_MMIO() */