Blame - arch/tile/mm/pgtable.c - kernel/msm-4.9

blob: de0de0c0e8a19946bcc6c0f14c486776d8091097 [file] [log] [blame]

Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	1	/*
				2	* Copyright 2010 Tilera Corporation. All Rights Reserved.
				3	*
				4	* This program is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU General Public License
				6	* as published by the Free Software Foundation, version 2.
				7	*
				8	* This program is distributed in the hope that it will be useful, but
				9	* WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
				11	* NON INFRINGEMENT. See the GNU General Public License for
				12	* more details.
				13	*/
				14
				15	#include <linux/sched.h>
				16	#include <linux/kernel.h>
				17	#include <linux/errno.h>
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	20	#include <linux/highmem.h>
				21	#include <linux/slab.h>
				22	#include <linux/pagemap.h>
				23	#include <linux/spinlock.h>
				24	#include <linux/cpumask.h>
				25	#include <linux/module.h>
				26	#include <linux/io.h>
				27	#include <linux/vmalloc.h>
				28	#include <linux/smp.h>
				29
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	30	#include <asm/pgtable.h>
				31	#include <asm/pgalloc.h>
				32	#include <asm/fixmap.h>
				33	#include <asm/tlb.h>
				34	#include <asm/tlbflush.h>
				35	#include <asm/homecache.h>
				36
				37	#define K(x) ((x) << (PAGE_SHIFT-10))
				38
				39	/*
				40	* The normal show_free_areas() is too verbose on Tile, with dozens
				41	* of processors and often four NUMA zones each with high and lowmem.
				42	*/
David Rientjes	b2b755b	2011-03-24 15:18:15 -0700	[diff] [blame]	43	void show_mem(unsigned int filter)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	44	{
				45	struct zone *zone;
				46
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	47	pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu"
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	48	" free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu"
				49	" pagecache:%lu swap:%lu\n",
				50	(global_page_state(NR_ACTIVE_ANON) +
				51	global_page_state(NR_ACTIVE_FILE)),
				52	(global_page_state(NR_INACTIVE_ANON) +
				53	global_page_state(NR_INACTIVE_FILE)),
				54	global_page_state(NR_FILE_DIRTY),
				55	global_page_state(NR_WRITEBACK),
				56	global_page_state(NR_UNSTABLE_NFS),
				57	global_page_state(NR_FREE_PAGES),
				58	(global_page_state(NR_SLAB_RECLAIMABLE) +
				59	global_page_state(NR_SLAB_UNRECLAIMABLE)),
				60	global_page_state(NR_FILE_MAPPED),
				61	global_page_state(NR_PAGETABLE),
				62	global_page_state(NR_BOUNCE),
				63	global_page_state(NR_FILE_PAGES),
				64	nr_swap_pages);
				65
				66	for_each_zone(zone) {
				67	unsigned long flags, order, total = 0, largest_order = -1;
				68
				69	if (!populated_zone(zone))
				70	continue;
				71
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	72	spin_lock_irqsave(&zone->lock, flags);
				73	for (order = 0; order < MAX_ORDER; order++) {
				74	int nr = zone->free_area[order].nr_free;
				75	total += nr << order;
				76	if (nr)
				77	largest_order = order;
				78	}
				79	spin_unlock_irqrestore(&zone->lock, flags);
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	80	pr_err("Node %d %7s: %lukB (largest %luKb)\n",
				81	zone_to_nid(zone), zone->name,
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	82	K(total), largest_order ? K(1UL) << largest_order : 0);
				83	}
				84	}
				85
				86	/*
				87	* Associate a virtual page frame with a given physical page frame
				88	* and protection flags for that frame.
				89	*/
				90	static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
				91	{
				92	pgd_t *pgd;
				93	pud_t *pud;
				94	pmd_t *pmd;
				95	pte_t *pte;
				96
				97	pgd = swapper_pg_dir + pgd_index(vaddr);
				98	if (pgd_none(*pgd)) {
				99	BUG();
				100	return;
				101	}
				102	pud = pud_offset(pgd, vaddr);
				103	if (pud_none(*pud)) {
				104	BUG();
				105	return;
				106	}
				107	pmd = pmd_offset(pud, vaddr);
				108	if (pmd_none(*pmd)) {
				109	BUG();
				110	return;
				111	}
				112	pte = pte_offset_kernel(pmd, vaddr);
				113	/* <pfn,flags> stored as-is, to permit clearing entries */
				114	set_pte(pte, pfn_pte(pfn, flags));
				115
				116	/*
				117	* It's enough to flush this one mapping.
				118	* This appears conservative since it is only called
				119	* from __set_fixmap.
				120	*/
				121	local_flush_tlb_page(NULL, vaddr, PAGE_SIZE);
				122	}
				123
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	124	void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
				125	{
				126	unsigned long address = __fix_to_virt(idx);
				127
				128	if (idx >= __end_of_fixed_addresses) {
				129	BUG();
				130	return;
				131	}
				132	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
				133	}
				134
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	135	/**
				136	* shatter_huge_page() - ensure a given address is mapped by a small page.
				137	*
				138	* This function converts a huge PTE mapping kernel LOWMEM into a bunch
				139	* of small PTEs with the same caching. No cache flush required, but we
				140	* must do a global TLB flush.
				141	*
				142	* Any caller that wishes to modify a kernel mapping that might
				143	* have been made with a huge page should call this function,
				144	* since doing so properly avoids race conditions with installing the
				145	* newly-shattered page and then flushing all the TLB entries.
				146	*
				147	* @addr: Address at which to shatter any existing huge page.
				148	*/
				149	void shatter_huge_page(unsigned long addr)
				150	{
				151	pgd_t *pgd;
				152	pud_t *pud;
				153	pmd_t *pmd;
				154	unsigned long flags = 0; /* happy compiler */
				155	#ifdef __PAGETABLE_PMD_FOLDED
				156	struct list_head *pos;
				157	#endif
				158
				159	/* Get a pointer to the pmd entry that we need to change. */
				160	addr &= HPAGE_MASK;
				161	BUG_ON(pgd_addr_invalid(addr));
				162	BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */
				163	pgd = swapper_pg_dir + pgd_index(addr);
				164	pud = pud_offset(pgd, addr);
				165	BUG_ON(!pud_present(*pud));
				166	pmd = pmd_offset(pud, addr);
				167	BUG_ON(!pmd_present(*pmd));
				168	if (!pmd_huge_page(*pmd))
				169	return;
				170
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	171	spin_lock_irqsave(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	172	if (!pmd_huge_page(*pmd)) {
				173	/* Lost the race to convert the huge page. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	174	spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	175	return;
				176	}
				177
				178	/* Shatter the huge page into the preallocated L2 page table. */
				179	pmd_populate_kernel(&init_mm, pmd,
				180	get_prealloc_pte(pte_pfn((pte_t )pmd)));
				181
				182	#ifdef __PAGETABLE_PMD_FOLDED
				183	/* Walk every pgd on the system and update the pmd there. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	184	spin_lock(&pgd_lock);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	185	list_for_each(pos, &pgd_list) {
				186	pmd_t *copy_pmd;
				187	pgd = list_to_pgd(pos) + pgd_index(addr);
				188	pud = pud_offset(pgd, addr);
				189	copy_pmd = pmd_offset(pud, addr);
				190	__set_pmd(copy_pmd, *pmd);
				191	}
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	192	spin_unlock(&pgd_lock);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	193	#endif
				194
				195	/* Tell every cpu to notice the change. */
				196	flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE,
				197	cpu_possible_mask, NULL, 0);
				198
				199	/* Hold the lock until the TLB flush is finished to avoid races. */
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	200	spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	201	}
				202
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	203	/*
				204	* List of all pgd's needed so it can invalidate entries in both cached
				205	* and uncached pgd's. This is essentially codepath-based locking
				206	* against pageattr.c; it is the unique case in which a valid change
				207	* of kernel pagetables can't be lazily synchronized by vmalloc faults.
				208	* vmalloc faults work because attached pagetables are never freed.
Chris Metcalf	719ea79	2012-03-29 15:50:08 -0400	[diff] [blame]	209	*
				210	* The lock is always taken with interrupts disabled, unlike on x86
				211	* and other platforms, because we need to take the lock in
				212	* shatter_huge_page(), which may be called from an interrupt context.
				213	* We are not at risk from the tlbflush IPI deadlock that was seen on
				214	* x86, since we use the flush_remote() API to have the hypervisor do
				215	* the TLB flushes regardless of irq disabling.
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	216	*/
				217	DEFINE_SPINLOCK(pgd_lock);
				218	LIST_HEAD(pgd_list);
				219
				220	static inline void pgd_list_add(pgd_t *pgd)
				221	{
				222	list_add(pgd_to_list(pgd), &pgd_list);
				223	}
				224
				225	static inline void pgd_list_del(pgd_t *pgd)
				226	{
				227	list_del(pgd_to_list(pgd));
				228	}
				229
				230	#define KERNEL_PGD_INDEX_START pgd_index(PAGE_OFFSET)
				231	#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_INDEX_START)
				232
				233	static void pgd_ctor(pgd_t *pgd)
				234	{
				235	unsigned long flags;
				236
				237	memset(pgd, 0, KERNEL_PGD_INDEX_START*sizeof(pgd_t));
				238	spin_lock_irqsave(&pgd_lock, flags);
				239
				240	#ifndef __tilegx__
				241	/*
				242	* Check that the user interrupt vector has no L2.
				243	* It never should for the swapper, and new page tables
				244	* should always start with an empty user interrupt vector.
				245	*/
				246	BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
				247	#endif
				248
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	249	memcpy(pgd + KERNEL_PGD_INDEX_START,
				250	swapper_pg_dir + KERNEL_PGD_INDEX_START,
				251	KERNEL_PGD_PTRS * sizeof(pgd_t));
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	252
				253	pgd_list_add(pgd);
				254	spin_unlock_irqrestore(&pgd_lock, flags);
				255	}
				256
				257	static void pgd_dtor(pgd_t *pgd)
				258	{
				259	unsigned long flags; /* can be called from interrupt context */
				260
				261	spin_lock_irqsave(&pgd_lock, flags);
				262	pgd_list_del(pgd);
				263	spin_unlock_irqrestore(&pgd_lock, flags);
				264	}
				265
				266	pgd_t pgd_alloc(struct mm_struct mm)
				267	{
				268	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
				269	if (pgd)
				270	pgd_ctor(pgd);
				271	return pgd;
				272	}
				273
				274	void pgd_free(struct mm_struct mm, pgd_t pgd)
				275	{
				276	pgd_dtor(pgd);
				277	kmem_cache_free(pgd_cache, pgd);
				278	}
				279
				280
				281	#define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER)
				282
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	283	struct page pgtable_alloc_one(struct mm_struct mm, unsigned long address,
				284	int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	285	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	286	gfp_t flags = GFP_KERNEL\|__GFP_REPEAT\|__GFP_ZERO;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	287	struct page *p;
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	288	int i;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	289
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	290	p = alloc_pages(flags, L2_USER_PGTABLE_ORDER);
				291	if (p == NULL)
				292	return NULL;
				293
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	294	/*
				295	* Make every page have a page_count() of one, not just the first.
				296	* We don't use __GFP_COMP since it doesn't look like it works
				297	* correctly with tlb_remove_page().
				298	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	299	for (i = 1; i < order; ++i) {
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	300	init_page_count(p+i);
				301	inc_zone_page_state(p+i, NR_PAGETABLE);
				302	}
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	303
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	304	pgtable_page_ctor(p);
				305	return p;
				306	}
				307
				308	/*
				309	* Free page immediately (used in __pte_alloc if we raced with another
				310	* process). We have to correct whatever pte_alloc_one() did before
				311	* returning the pages to the allocator.
				312	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	313	void pgtable_free(struct mm_struct mm, struct page p, int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	314	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	315	int i;
				316
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	317	pgtable_page_dtor(p);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	318	__free_page(p);
				319
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	320	for (i = 1; i < order; ++i) {
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	321	__free_page(p+i);
				322	dec_zone_page_state(p+i, NR_PAGETABLE);
				323	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	324	}
				325
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	326	void __pgtable_free_tlb(struct mmu_gather tlb, struct page pte,
				327	unsigned long address, int order)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	328	{
				329	int i;
				330
				331	pgtable_page_dtor(pte);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	332	tlb_remove_page(tlb, pte);
				333
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	334	for (i = 1; i < order; ++i) {
Peter Zijlstra	342d87e	2011-01-25 18:31:12 +0100	[diff] [blame]	335	tlb_remove_page(tlb, pte + i);
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	336	dec_zone_page_state(pte + i, NR_PAGETABLE);
				337	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	338	}
				339
				340	#ifndef __tilegx__
				341
				342	/*
				343	* FIXME: needs to be atomic vs hypervisor writes. For now we make the
				344	* window of vulnerability a bit smaller by doing an unlocked 8-bit update.
				345	*/
				346	int ptep_test_and_clear_young(struct vm_area_struct *vma,
				347	unsigned long addr, pte_t *ptep)
				348	{
				349	#if HV_PTE_INDEX_ACCESSED < 8 \|\| HV_PTE_INDEX_ACCESSED >= 16
				350	# error Code assumes HV_PTE "accessed" bit in second byte
				351	#endif
				352	u8 tmp = (u8 )ptep;
				353	u8 second_byte = tmp[1];
				354	if (!(second_byte & (1 << (HV_PTE_INDEX_ACCESSED - 8))))
				355	return 0;
				356	tmp[1] = second_byte & ~(1 << (HV_PTE_INDEX_ACCESSED - 8));
				357	return 1;
				358	}
				359
				360	/*
				361	* This implementation is atomic vs hypervisor writes, since the hypervisor
				362	* always writes the low word (where "accessed" and "dirty" are) and this
				363	* routine only writes the high word.
				364	*/
				365	void ptep_set_wrprotect(struct mm_struct *mm,
				366	unsigned long addr, pte_t *ptep)
				367	{
				368	#if HV_PTE_INDEX_WRITABLE < 32
				369	# error Code assumes HV_PTE "writable" bit in high word
				370	#endif
				371	u32 tmp = (u32 )ptep;
				372	tmp[1] = tmp[1] & ~(1 << (HV_PTE_INDEX_WRITABLE - 32));
				373	}
				374
				375	#endif
				376
				377	pte_t virt_to_pte(struct mm_struct mm, unsigned long addr)
				378	{
				379	pgd_t *pgd;
				380	pud_t *pud;
				381	pmd_t *pmd;
				382
				383	if (pgd_addr_invalid(addr))
				384	return NULL;
				385
				386	pgd = mm ? pgd_offset(mm, addr) : swapper_pg_dir + pgd_index(addr);
				387	pud = pud_offset(pgd, addr);
				388	if (!pud_present(*pud))
				389	return NULL;
				390	pmd = pmd_offset(pud, addr);
				391	if (pmd_huge_page(*pmd))
				392	return (pte_t *)pmd;
				393	if (!pmd_present(*pmd))
				394	return NULL;
				395	return pte_offset_kernel(pmd, addr);
				396	}
				397
				398	pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu)
				399	{
				400	unsigned int width = smp_width;
				401	int x = cpu % width;
				402	int y = cpu / width;
				403	BUG_ON(y >= smp_height);
				404	BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
				405	BUG_ON(cpu < 0 \|\| cpu >= NR_CPUS);
				406	BUG_ON(!cpu_is_valid_lotar(cpu));
				407	return hv_pte_set_lotar(prot, HV_XY_TO_LOTAR(x, y));
				408	}
				409
				410	int get_remote_cache_cpu(pgprot_t prot)
				411	{
				412	HV_LOTAR lotar = hv_pte_get_lotar(prot);
				413	int x = HV_LOTAR_X(lotar);
				414	int y = HV_LOTAR_Y(lotar);
				415	BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3);
				416	return x + y * smp_width;
				417	}
				418
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	419	/*
				420	* Convert a kernel VA to a PA and homing information.
				421	*/
				422	int va_to_cpa_and_pte(void va, unsigned long long cpa, pte_t *pte)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	423	{
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	424	struct page *page = virt_to_page(va);
				425	pte_t null_pte = { 0 };
				426
				427	*cpa = __pa(va);
				428
				429	/* Note that this is not writing a page table, just returning a pte. */
				430	*pte = pte_set_home(null_pte, page_home(page));
				431
				432	return 0; /* return non-zero if not hfh? */
				433	}
				434	EXPORT_SYMBOL(va_to_cpa_and_pte);
				435
				436	void __set_pte(pte_t *ptep, pte_t pte)
				437	{
				438	#ifdef __tilegx__
				439	*ptep = pte;
				440	#else
				441	# if HV_PTE_INDEX_PRESENT >= 32 \|\| HV_PTE_INDEX_MIGRATING >= 32
				442	# error Must write the present and migrating bits last
				443	# endif
				444	if (pte_present(pte)) {
				445	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
				446	barrier();
				447	((u32 *)ptep)[0] = (u32)(pte_val(pte));
				448	} else {
				449	((u32 *)ptep)[0] = (u32)(pte_val(pte));
				450	barrier();
				451	((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
				452	}
				453	#endif /* __tilegx__ */
				454	}
				455
				456	void set_pte(pte_t *ptep, pte_t pte)
				457	{
Chris Metcalf	12400f1	2012-03-29 15:36:53 -0400	[diff] [blame]	458	if (pte_present(pte) &&
				459	(!CHIP_HAS_MMIO() \|\| hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) {
				460	/* The PTE actually references physical memory. */
				461	unsigned long pfn = pte_pfn(pte);
				462	if (pfn_valid(pfn)) {
				463	/* Update the home of the PTE from the struct page. */
				464	pte = pte_set_home(pte, page_home(pfn_to_page(pfn)));
				465	} else if (hv_pte_get_mode(pte) == 0) {
				466	/* remap_pfn_range(), etc, must supply PTE mode. */
				467	panic("set_pte(): out-of-range PFN and mode 0\n");
				468	}
				469	}
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	470
Chris Metcalf	76c567f	2011-02-28 16:37:34 -0500	[diff] [blame]	471	__set_pte(ptep, pte);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	472	}
				473
				474	/* Can this mm load a PTE with cached_priority set? */
				475	static inline int mm_is_priority_cached(struct mm_struct *mm)
				476	{
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	477	return mm->context.priority_cached != 0;
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	478	}
				479
				480	/*
				481	* Add a priority mapping to an mm_context and
				482	* notify the hypervisor if this is the first one.
				483	*/
				484	void start_mm_caching(struct mm_struct *mm)
				485	{
				486	if (!mm_is_priority_cached(mm)) {
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	487	mm->context.priority_cached = -1UL;
				488	hv_set_caching(-1UL);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	489	}
				490	}
				491
				492	/*
				493	* Validate and return the priority_cached flag. We know if it's zero
				494	* that we don't need to scan, since we immediately set it non-zero
				495	* when we first consider a MAP_CACHE_PRIORITY mapping.
				496	*
				497	* We only _try_ to acquire the mmap_sem semaphore; if we can't acquire it,
				498	* since we're in an interrupt context (servicing switch_mm) we don't
				499	* worry about it and don't unset the "priority_cached" field.
				500	* Presumably we'll come back later and have more luck and clear
				501	* the value then; for now we'll just keep the cache marked for priority.
				502	*/
Chris Metcalf	d5d14ed	2012-03-29 13:58:43 -0400	[diff] [blame]	503	static unsigned long update_priority_cached(struct mm_struct *mm)
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	504	{
				505	if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) {
				506	struct vm_area_struct *vm;
				507	for (vm = mm->mmap; vm; vm = vm->vm_next) {
				508	if (hv_pte_get_cached_priority(vm->vm_page_prot))
				509	break;
				510	}
				511	if (vm == NULL)
				512	mm->context.priority_cached = 0;
				513	up_write(&mm->mmap_sem);
				514	}
				515	return mm->context.priority_cached;
				516	}
				517
				518	/* Set caching correctly for an mm that we are switching to. */
				519	void check_mm_caching(struct mm_struct prev, struct mm_struct next)
				520	{
				521	if (!mm_is_priority_cached(next)) {
				522	/*
				523	* If the new mm doesn't use priority caching, just see if we
				524	* need the hv_set_caching(), or can assume it's already zero.
				525	*/
				526	if (mm_is_priority_cached(prev))
				527	hv_set_caching(0);
				528	} else {
				529	hv_set_caching(update_priority_cached(next));
				530	}
				531	}
				532
				533	#if CHIP_HAS_MMIO()
				534
				535	/* Map an arbitrary MMIO address, homed according to pgprot, into VA space. */
				536	void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
				537	pgprot_t home)
				538	{
				539	void *addr;
				540	struct vm_struct *area;
				541	unsigned long offset, last_addr;
				542	pgprot_t pgprot;
				543
				544	/* Don't allow wraparound or zero size */
				545	last_addr = phys_addr + size - 1;
				546	if (!size \|\| last_addr < phys_addr)
				547	return NULL;
				548
				549	/* Create a read/write, MMIO VA mapping homed at the requested shim. */
				550	pgprot = PAGE_KERNEL;
				551	pgprot = hv_pte_set_mode(pgprot, HV_PTE_MODE_MMIO);
				552	pgprot = hv_pte_set_lotar(pgprot, hv_pte_get_lotar(home));
				553
				554	/*
				555	* Mappings have to be page-aligned
				556	*/
				557	offset = phys_addr & ~PAGE_MASK;
				558	phys_addr &= PAGE_MASK;
				559	size = PAGE_ALIGN(last_addr+1) - phys_addr;
				560
				561	/*
				562	* Ok, go for it..
				563	*/
				564	area = get_vm_area(size, VM_IOREMAP /* \| other flags? */);
				565	if (!area)
				566	return NULL;
				567	area->phys_addr = phys_addr;
				568	addr = area->addr;
				569	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
				570	phys_addr, pgprot)) {
				571	remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
				572	return NULL;
				573	}
				574	return (__force void __iomem ) (offset + (char )addr);
				575	}
				576	EXPORT_SYMBOL(ioremap_prot);
				577
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	578	/* Unmap an MMIO VA mapping. */
				579	void iounmap(volatile void __iomem *addr_in)
				580	{
				581	volatile void __iomem addr = (volatile void __iomem )
				582	(PAGE_MASK & (unsigned long __force)addr_in);
				583	#if 1
				584	vunmap((void * __force)addr);
				585	#else
				586	/* x86 uses this complicated flow instead of vunmap(). Is
				587	* there any particular reason we should do the same? */
				588	struct vm_struct p, o;
				589
				590	/* Use the vm area unlocked, assuming the caller
				591	ensures there isn't another iounmap for the same address
				592	in parallel. Reuse of the virtual address is prevented by
				593	leaving it in the global lists until we're done with it.
				594	cpa takes care of the direct mappings. */
				595	read_lock(&vmlist_lock);
				596	for (p = vmlist; p; p = p->next) {
				597	if (p->addr == addr)
				598	break;
				599	}
				600	read_unlock(&vmlist_lock);
				601
				602	if (!p) {
Chris Metcalf	0707ad3	2010-06-25 17:04:17 -0400	[diff] [blame]	603	pr_err("iounmap: bad address %p\n", addr);
Chris Metcalf	867e359	2010-05-28 23:09:12 -0400	[diff] [blame]	604	dump_stack();
				605	return;
				606	}
				607
				608	/* Finally remove it */
				609	o = remove_vm_area((void *)addr);
				610	BUG_ON(p != o \|\| o == NULL);
				611	kfree(p);
				612	#endif
				613	}
				614	EXPORT_SYMBOL(iounmap);
				615
				616	#endif /* CHIP_HAS_MMIO() */