Blame - arch/powerpc/mm/pgtable_64.c - kernel/msm-5.4

blob: cdb19ab859d31f672fa02e4e44b68b7bb687b1bb [file] [log] [blame]

Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	1	/*
				2	* This file contains ioremap and related functions for 64-bit machines.
				3	*
				4	* Derived from arch/ppc64/mm/init.c
				5	* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
				6	*
				7	* Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
				8	* and Cort Dougan (PReP) (cort@cs.nmt.edu)
				9	* Copyright (C) 1996 Paul Mackerras
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	10	*
				11	* Derived from "arch/i386/mm/init.c"
				12	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
				13	*
				14	* Dave Engebretsen <engebret@us.ibm.com>
				15	* Rework for PPC64 port.
				16	*
				17	* This program is free software; you can redistribute it and/or
				18	* modify it under the terms of the GNU General Public License
				19	* as published by the Free Software Foundation; either version
				20	* 2 of the License, or (at your option) any later version.
				21	*
				22	*/
				23
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	24	#include <linux/signal.h>
				25	#include <linux/sched.h>
				26	#include <linux/kernel.h>
				27	#include <linux/errno.h>
				28	#include <linux/string.h>
Paul Gortmaker	66b15db	2011-05-27 10:46:24 -0400	[diff] [blame]	29	#include <linux/export.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	30	#include <linux/types.h>
				31	#include <linux/mman.h>
				32	#include <linux/mm.h>
				33	#include <linux/swap.h>
				34	#include <linux/stddef.h>
				35	#include <linux/vmalloc.h>
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	36	#include <linux/bootmem.h>
Yinghai Lu	95f72d1	2010-07-12 14:36:09 +1000	[diff] [blame]	37	#include <linux/memblock.h>
Tejun Heo	5a0e3ad	2010-03-24 17:04:11 +0900	[diff] [blame]	38	#include <linux/slab.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	39
				40	#include <asm/pgalloc.h>
				41	#include <asm/page.h>
				42	#include <asm/prom.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	43	#include <asm/io.h>
				44	#include <asm/mmu_context.h>
				45	#include <asm/pgtable.h>
				46	#include <asm/mmu.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	47	#include <asm/smp.h>
				48	#include <asm/machdep.h>
				49	#include <asm/tlb.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	50	#include <asm/processor.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	51	#include <asm/cputable.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	52	#include <asm/sections.h>
Stephen Rothwell	5e203d6	2006-09-25 13:36:31 +1000	[diff] [blame]	53	#include <asm/firmware.h>
David Gibson	800fc3e	2005-11-16 15:43:48 +1100	[diff] [blame]	54
				55	#include "mmu_decl.h"
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	56
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame]	57	#define CREATE_TRACE_POINTS
				58	#include <trace/events/thp.h>
				59
Aneesh Kumar K.V	78f1dbd	2012-09-10 02:52:57 +0000	[diff] [blame]	60	/* Some sanity checking */
				61	#if TASK_SIZE_USER64 > PGTABLE_RANGE
				62	#error TASK_SIZE_USER64 exceeds pagetable range
				63	#endif
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	64
Aneesh Kumar K.V	78f1dbd	2012-09-10 02:52:57 +0000	[diff] [blame]	65	#ifdef CONFIG_PPC_STD_MMU_64
Aneesh Kumar K.V	af81d78	2013-03-13 03:34:55 +0000	[diff] [blame]	66	#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
Aneesh Kumar K.V	78f1dbd	2012-09-10 02:52:57 +0000	[diff] [blame]	67	#error TASK_SIZE_USER64 exceeds user VSID range
				68	#endif
				69	#endif
				70
				71	unsigned long ioremap_bot = IOREMAP_BASE;
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	72
				73	#ifdef CONFIG_PPC_MMU_NOHASH
Scott Wood	7d17622	2014-08-01 22:07:40 -0500	[diff] [blame]	74	static __ref void *early_alloc_pgtable(unsigned long size)
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	75	{
				76	void *pt;
				77
Anton Blanchard	1023973	2014-09-17 22:15:33 +1000	[diff] [blame^]	78	pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS)));
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	79	memset(pt, 0, size);
				80
				81	return pt;
				82	}
				83	#endif /* CONFIG_PPC_MMU_NOHASH */
				84
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	85	/*
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	86	* map_kernel_page currently only called by __ioremap
				87	* map_kernel_page adds an entry to the ioremap page table
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	88	* and adds an entry to the HPT, possibly bolting it
				89	*/
Benjamin Herrenschmidt	32a7494	2009-07-23 23:15:58 +0000	[diff] [blame]	90	int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	91	{
				92	pgd_t *pgdp;
				93	pud_t *pudp;
				94	pmd_t *pmdp;
				95	pte_t *ptep;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	96
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	97	if (slab_is_available()) {
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	98	pgdp = pgd_offset_k(ea);
				99	pudp = pud_alloc(&init_mm, pgdp, ea);
				100	if (!pudp)
				101	return -ENOMEM;
				102	pmdp = pmd_alloc(&init_mm, pudp, ea);
				103	if (!pmdp)
				104	return -ENOMEM;
Paul Mackerras	23fd077	2005-10-31 13:37:12 +1100	[diff] [blame]	105	ptep = pte_alloc_kernel(pmdp, ea);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	106	if (!ptep)
				107	return -ENOMEM;
				108	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
				109	__pgprot(flags)));
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	110	} else {
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	111	#ifdef CONFIG_PPC_MMU_NOHASH
				112	/* Warning ! This will blow up if bootmem is not initialized
				113	* which our ppc64 code is keen to do that, we'll need to
				114	* fix it and/or be more careful
				115	*/
				116	pgdp = pgd_offset_k(ea);
				117	#ifdef PUD_TABLE_SIZE
				118	if (pgd_none(*pgdp)) {
				119	pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
				120	BUG_ON(pudp == NULL);
				121	pgd_populate(&init_mm, pgdp, pudp);
				122	}
				123	#endif /* PUD_TABLE_SIZE */
				124	pudp = pud_offset(pgdp, ea);
				125	if (pud_none(*pudp)) {
				126	pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
				127	BUG_ON(pmdp == NULL);
				128	pud_populate(&init_mm, pudp, pmdp);
				129	}
				130	pmdp = pmd_offset(pudp, ea);
				131	if (!pmd_present(*pmdp)) {
				132	ptep = early_alloc_pgtable(PAGE_SIZE);
				133	BUG_ON(ptep == NULL);
				134	pmd_populate_kernel(&init_mm, pmdp, ptep);
				135	}
				136	ptep = pte_offset_kernel(pmdp, ea);
				137	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
				138	__pgprot(flags)));
				139	#else /* CONFIG_PPC_MMU_NOHASH */
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	140	/*
				141	* If the mm subsystem is not fully up, we cannot create a
				142	* linux page table entry for this mapping. Simply bolt an
				143	* entry in the hardware page table.
Benjamin Herrenschmidt	3c726f8	2005-11-07 11:06:55 +1100	[diff] [blame]	144	*
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	145	*/
Paul Mackerras	1189be6	2007-10-11 20:37:10 +1000	[diff] [blame]	146	if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
				147	mmu_io_psize, mmu_kernel_ssize)) {
Benjamin Herrenschmidt	77ac166	2005-11-10 11:12:11 +1100	[diff] [blame]	148	printk(KERN_ERR "Failed to do bolted mapping IO "
				149	"memory at %016lx !\n", pa);
				150	return -ENOMEM;
				151	}
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	152	#endif /* !CONFIG_PPC_MMU_NOHASH */
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	153	}
Scott Wood	47ce8af	2013-10-11 19:22:37 -0500	[diff] [blame]	154
				155	#ifdef CONFIG_PPC_BOOK3E_64
				156	/*
				157	* With hardware tablewalk, a sync is needed to ensure that
				158	* subsequent accesses see the PTE we just wrote. Unlike userspace
				159	* mappings, we can't tolerate spurious faults, so make sure
				160	* the new PTE will be seen the first time.
				161	*/
				162	mb();
				163	#else
				164	smp_wmb();
				165	#endif
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	166	return 0;
				167	}
				168
				169
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	170	/**
				171	* __ioremap_at - Low level function to establish the page tables
				172	* for an IO mapping
				173	*/
				174	void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	175	unsigned long flags)
				176	{
				177	unsigned long i;
				178
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	179	/* Make sure we have the base flags */
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	180	if ((flags & _PAGE_PRESENT) == 0)
				181	flags \|= pgprot_val(PAGE_KERNEL);
				182
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	183	/* Non-cacheable page cannot be coherent */
				184	if (flags & _PAGE_NO_CACHE)
				185	flags &= ~_PAGE_COHERENT;
				186
				187	/* We don't support the 4K PFN hack with ioremap */
				188	if (flags & _PAGE_4K_PFN)
				189	return NULL;
				190
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	191	WARN_ON(pa & ~PAGE_MASK);
				192	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
				193	WARN_ON(size & ~PAGE_MASK);
				194
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	195	for (i = 0; i < size; i += PAGE_SIZE)
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	196	if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	197	return NULL;
				198
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	199	return (void __iomem *)ea;
				200	}
				201
				202	/**
				203	* __iounmap_from - Low level function to tear down the page tables
				204	* for an IO mapping. This is used for mappings that
				205	* are manipulated manually, like partial unmapping of
				206	* PCI IOs or ISA space.
				207	*/
				208	void __iounmap_at(void *ea, unsigned long size)
				209	{
				210	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
				211	WARN_ON(size & ~PAGE_MASK);
				212
				213	unmap_kernel_range((unsigned long)ea, size);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	214	}
				215
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	216	void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
				217	unsigned long flags, void *caller)
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	218	{
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	219	phys_addr_t paligned;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	220	void __iomem *ret;
				221
				222	/*
				223	* Choose an address to map it to.
				224	* Once the imalloc system is running, we use it.
				225	* Before that, we map using addresses going
				226	* up from ioremap_bot. imalloc will use
				227	* the addresses from ioremap_bot through
				228	* IMALLOC_END
				229	*
				230	*/
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	231	paligned = addr & PAGE_MASK;
				232	size = PAGE_ALIGN(addr + size) - paligned;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	233
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	234	if ((size == 0) \|\| (paligned == 0))
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	235	return NULL;
				236
				237	if (mem_init_done) {
				238	struct vm_struct *area;
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	239
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	240	area = __get_vm_area_caller(size, VM_IOREMAP,
				241	ioremap_bot, IOREMAP_END,
				242	caller);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	243	if (area == NULL)
				244	return NULL;
Michael Ellerman	7a9d125	2010-11-28 18:26:36 +0000	[diff] [blame]	245
				246	area->phys_addr = paligned;
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	247	ret = __ioremap_at(paligned, area->addr, size, flags);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	248	if (!ret)
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	249	vunmap(area->addr);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	250	} else {
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	251	ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	252	if (ret)
				253	ioremap_bot += size;
				254	}
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	255
				256	if (ret)
				257	ret += addr & ~PAGE_MASK;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	258	return ret;
				259	}
				260
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	261	void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
				262	unsigned long flags)
				263	{
				264	return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
				265	}
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	266
Benjamin Herrenschmidt	68a6435	2006-11-13 09:27:39 +1100	[diff] [blame]	267	void __iomem * ioremap(phys_addr_t addr, unsigned long size)
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	268	{
				269	unsigned long flags = _PAGE_NO_CACHE \| _PAGE_GUARDED;
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	270	void *caller = __builtin_return_address(0);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	271
				272	if (ppc_md.ioremap)
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	273	return ppc_md.ioremap(addr, size, flags, caller);
				274	return __ioremap_caller(addr, size, flags, caller);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	275	}
				276
Anton Blanchard	be135f4	2011-05-08 21:41:59 +0000	[diff] [blame]	277	void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
				278	{
				279	unsigned long flags = _PAGE_NO_CACHE;
				280	void *caller = __builtin_return_address(0);
				281
				282	if (ppc_md.ioremap)
				283	return ppc_md.ioremap(addr, size, flags, caller);
				284	return __ioremap_caller(addr, size, flags, caller);
				285	}
				286
Anton Blanchard	40f1ce7	2011-05-08 21:43:47 +0000	[diff] [blame]	287	void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	288	unsigned long flags)
				289	{
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	290	void *caller = __builtin_return_address(0);
				291
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	292	/* writeable implies dirty for kernel addresses */
				293	if (flags & _PAGE_RW)
				294	flags \|= _PAGE_DIRTY;
				295
				296	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
				297	flags &= ~(_PAGE_USER \| _PAGE_EXEC);
				298
Benjamin Herrenschmidt	55052ee	2010-04-07 14:39:36 +1000	[diff] [blame]	299	#ifdef _PAGE_BAP_SR
				300	/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
				301	* which means that we just cleared supervisor access... oops ;-) This
				302	* restores it
				303	*/
				304	flags \|= _PAGE_BAP_SR;
				305	#endif
				306
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	307	if (ppc_md.ioremap)
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	308	return ppc_md.ioremap(addr, size, flags, caller);
				309	return __ioremap_caller(addr, size, flags, caller);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	310	}
				311
				312
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	313	/*
				314	* Unmap an IO region and remove it from imalloc'd list.
				315	* Access to IO memory should be serialized by driver.
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	316	*/
Benjamin Herrenschmidt	68a6435	2006-11-13 09:27:39 +1100	[diff] [blame]	317	void __iounmap(volatile void __iomem *token)
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	318	{
				319	void *addr;
				320
				321	if (!mem_init_done)
				322	return;
				323
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	324	addr = (void *) ((unsigned long __force)
				325	PCI_FIX_ADDR(token) & PAGE_MASK);
				326	if ((unsigned long)addr < ioremap_bot) {
				327	printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
				328	" at 0x%p\n", addr);
				329	return;
				330	}
				331	vunmap(addr);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	332	}
				333
Benjamin Herrenschmidt	68a6435	2006-11-13 09:27:39 +1100	[diff] [blame]	334	void iounmap(volatile void __iomem *token)
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	335	{
				336	if (ppc_md.iounmap)
				337	ppc_md.iounmap(token);
				338	else
				339	__iounmap(token);
				340	}
				341
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	342	EXPORT_SYMBOL(ioremap);
Anton Blanchard	be135f4	2011-05-08 21:41:59 +0000	[diff] [blame]	343	EXPORT_SYMBOL(ioremap_wc);
Anton Blanchard	40f1ce7	2011-05-08 21:43:47 +0000	[diff] [blame]	344	EXPORT_SYMBOL(ioremap_prot);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	345	EXPORT_SYMBOL(__ioremap);
Olof Johansson	a302cb9	2007-08-31 13:58:51 +1000	[diff] [blame]	346	EXPORT_SYMBOL(__ioremap_at);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	347	EXPORT_SYMBOL(iounmap);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	348	EXPORT_SYMBOL(__iounmap);
Olof Johansson	a302cb9	2007-08-31 13:58:51 +1000	[diff] [blame]	349	EXPORT_SYMBOL(__iounmap_at);
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	350
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	351	/*
				352	* For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
				353	* For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
				354	*/
				355	struct page *pmd_page(pmd_t pmd)
				356	{
				357	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
				358	if (pmd_trans_huge(pmd))
				359	return pfn_to_page(pmd_pfn(pmd));
				360	#endif
				361	return virt_to_page(pmd_page_vaddr(pmd));
				362	}
				363
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	364	#ifdef CONFIG_PPC_64K_PAGES
				365	static pte_t get_from_cache(struct mm_struct mm)
				366	{
				367	void pte_frag, ret;
				368
				369	spin_lock(&mm->page_table_lock);
				370	ret = mm->context.pte_frag;
				371	if (ret) {
				372	pte_frag = ret + PTE_FRAG_SIZE;
				373	/*
				374	* If we have taken up all the fragments mark PTE page NULL
				375	*/
				376	if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
				377	pte_frag = NULL;
				378	mm->context.pte_frag = pte_frag;
				379	}
				380	spin_unlock(&mm->page_table_lock);
				381	return (pte_t *)ret;
				382	}
				383
				384	static pte_t __alloc_for_cache(struct mm_struct mm, int kernel)
				385	{
				386	void *ret = NULL;
				387	struct page *page = alloc_page(GFP_KERNEL \| __GFP_NOTRACK \|
				388	__GFP_REPEAT \| __GFP_ZERO);
				389	if (!page)
				390	return NULL;
Kirill A. Shutemov	4f804943	2013-11-14 14:31:38 -0800	[diff] [blame]	391	if (!kernel && !pgtable_page_ctor(page)) {
				392	__free_page(page);
				393	return NULL;
				394	}
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	395
				396	ret = page_address(page);
				397	spin_lock(&mm->page_table_lock);
				398	/*
				399	* If we find pgtable_page set, we return
				400	* the allocated page with single fragement
				401	* count.
				402	*/
				403	if (likely(!mm->context.pte_frag)) {
				404	atomic_set(&page->_count, PTE_FRAG_NR);
				405	mm->context.pte_frag = ret + PTE_FRAG_SIZE;
				406	}
				407	spin_unlock(&mm->page_table_lock);
				408
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	409	return (pte_t *)ret;
				410	}
				411
				412	pte_t page_table_alloc(struct mm_struct mm, unsigned long vmaddr, int kernel)
				413	{
				414	pte_t *pte;
				415
				416	pte = get_from_cache(mm);
				417	if (pte)
				418	return pte;
				419
				420	return __alloc_for_cache(mm, kernel);
				421	}
				422
				423	void page_table_free(struct mm_struct mm, unsigned long table, int kernel)
				424	{
				425	struct page *page = virt_to_page(table);
				426	if (put_page_testzero(page)) {
				427	if (!kernel)
				428	pgtable_page_dtor(page);
				429	free_hot_cold_page(page, 0);
				430	}
				431	}
				432
				433	#ifdef CONFIG_SMP
				434	static void page_table_free_rcu(void *table)
				435	{
				436	struct page *page = virt_to_page(table);
				437	if (put_page_testzero(page)) {
				438	pgtable_page_dtor(page);
				439	free_hot_cold_page(page, 0);
				440	}
				441	}
				442
				443	void pgtable_free_tlb(struct mmu_gather tlb, void table, int shift)
				444	{
				445	unsigned long pgf = (unsigned long)table;
				446
				447	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
				448	pgf \|= shift;
				449	tlb_remove_table(tlb, (void *)pgf);
				450	}
				451
				452	void __tlb_remove_table(void *_table)
				453	{
				454	void table = (void )((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
				455	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
				456
				457	if (!shift)
				458	/* PTE page needs special handling */
				459	page_table_free_rcu(table);
				460	else {
				461	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
				462	kmem_cache_free(PGT_CACHE(shift), table);
				463	}
				464	}
				465	#else
				466	void pgtable_free_tlb(struct mmu_gather tlb, void table, int shift)
				467	{
				468	if (!shift) {
				469	/* PTE page needs special handling */
				470	struct page *page = virt_to_page(table);
				471	if (put_page_testzero(page)) {
				472	pgtable_page_dtor(page);
				473	free_hot_cold_page(page, 0);
				474	}
				475	} else {
				476	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
				477	kmem_cache_free(PGT_CACHE(shift), table);
				478	}
				479	}
				480	#endif
				481	#endif /* CONFIG_PPC_64K_PAGES */
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	482
				483	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
				484
				485	/*
				486	* This is called when relaxing access to a hugepage. It's also called in the page
				487	* fault path when we don't hit any of the major fault cases, ie, a minor
				488	* update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
				489	* handled those two for us, we additionally deal with missing execute
				490	* permission here on some processors
				491	*/
				492	int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
				493	pmd_t *pmdp, pmd_t entry, int dirty)
				494	{
				495	int changed;
				496	#ifdef CONFIG_DEBUG_VM
				497	WARN_ON(!pmd_trans_huge(*pmdp));
				498	assert_spin_locked(&vma->vm_mm->page_table_lock);
				499	#endif
				500	changed = !pmd_same(*(pmdp), entry);
				501	if (changed) {
				502	__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
				503	/*
				504	* Since we are not supporting SW TLB systems, we don't
				505	* have any thing similar to flush_tlb_page_nohash()
				506	*/
				507	}
				508	return changed;
				509	}
				510
				511	unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	512	pmd_t *pmdp, unsigned long clr,
				513	unsigned long set)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	514	{
				515
				516	unsigned long old, tmp;
				517
				518	#ifdef CONFIG_DEBUG_VM
				519	WARN_ON(!pmd_trans_huge(*pmdp));
				520	assert_spin_locked(&mm->page_table_lock);
				521	#endif
				522
				523	#ifdef PTE_ATOMIC_UPDATES
				524	__asm__ __volatile__(
				525	"1: ldarx %0,0,%3\n\
				526	andi. %1,%0,%6\n\
				527	bne- 1b \n\
				528	andc %1,%0,%4 \n\
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	529	or %1,%1,%7\n\
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	530	stdcx. %1,0,%3 \n\
				531	bne- 1b"
				532	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	533	: "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY), "r" (set)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	534	: "cc" );
				535	#else
				536	old = pmd_val(*pmdp);
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	537	*pmdp = __pmd((old & ~clr) \| set);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	538	#endif
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame]	539	trace_hugepage_update(addr, old, clr, set);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	540	if (old & _PAGE_HASHPTE)
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	541	hpte_do_hugepage_flush(mm, addr, pmdp, old);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	542	return old;
				543	}
				544
				545	pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address,
				546	pmd_t *pmdp)
				547	{
				548	pmd_t pmd;
				549
				550	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
				551	if (pmd_trans_huge(*pmdp)) {
				552	pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp);
				553	} else {
				554	/*
				555	* khugepaged calls this for normal pmd
				556	*/
				557	pmd = *pmdp;
				558	pmd_clear(pmdp);
				559	/*
				560	* Wait for all pending hash_page to finish. This is needed
				561	* in case of subpage collapse. When we collapse normal pages
				562	* to hugepage, we first clear the pmd, then invalidate all
				563	* the PTE entries. The assumption here is that any low level
				564	* page fault will see a none pmd and take the slow path that
				565	* will wait on mmap_sem. But we could very well be in a
				566	* hash_page with local ptep pointer value. Such a hash page
				567	* can result in adding new HPTE entries for normal subpages.
				568	* That means we could be modifying the page content as we
				569	* copy them to a huge page. So wait for parallel hash_page
				570	* to finish before invalidating HPTE entries. We can do this
				571	* by sending an IPI to all the cpus and executing a dummy
				572	* function there.
				573	*/
				574	kick_all_cpus_sync();
				575	/*
				576	* Now invalidate the hpte entries in the range
				577	* covered by pmd. This make sure we take a
				578	* fault and will find the pmd as none, which will
				579	* result in a major fault which takes mmap_sem and
				580	* hence wait for collapse to complete. Without this
				581	* the __collapse_huge_page_copy can result in copying
				582	* the old content.
				583	*/
				584	flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
				585	}
				586	return pmd;
				587	}
				588
				589	int pmdp_test_and_clear_young(struct vm_area_struct *vma,
				590	unsigned long address, pmd_t *pmdp)
				591	{
				592	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
				593	}
				594
				595	/*
				596	* We currently remove entries from the hashtable regardless of whether
				597	* the entry was young or dirty. The generic routines only flush if the
				598	* entry was young or dirty which is not good enough.
				599	*
				600	* We should be more intelligent about this but for the moment we override
				601	* these functions and force a tlb flush unconditionally
				602	*/
				603	int pmdp_clear_flush_young(struct vm_area_struct *vma,
				604	unsigned long address, pmd_t *pmdp)
				605	{
				606	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
				607	}
				608
				609	/*
				610	* We mark the pmd splitting and invalidate all the hpte
				611	* entries for this hugepage.
				612	*/
				613	void pmdp_splitting_flush(struct vm_area_struct *vma,
				614	unsigned long address, pmd_t *pmdp)
				615	{
				616	unsigned long old, tmp;
				617
				618	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
				619
				620	#ifdef CONFIG_DEBUG_VM
				621	WARN_ON(!pmd_trans_huge(*pmdp));
				622	assert_spin_locked(&vma->vm_mm->page_table_lock);
				623	#endif
				624
				625	#ifdef PTE_ATOMIC_UPDATES
				626
				627	__asm__ __volatile__(
				628	"1: ldarx %0,0,%3\n\
				629	andi. %1,%0,%6\n\
				630	bne- 1b \n\
				631	ori %1,%0,%4 \n\
				632	stdcx. %1,0,%3 \n\
				633	bne- 1b"
				634	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
				635	: "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY)
				636	: "cc" );
				637	#else
				638	old = pmd_val(*pmdp);
				639	*pmdp = __pmd(old \| _PAGE_SPLITTING);
				640	#endif
				641	/*
				642	* If we didn't had the splitting flag set, go and flush the
				643	* HPTE entries.
				644	*/
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame]	645	trace_hugepage_splitting(address, old);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	646	if (!(old & _PAGE_SPLITTING)) {
				647	/* We need to flush the hpte */
				648	if (old & _PAGE_HASHPTE)
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	649	hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	650	}
Aneesh Kumar K.V	346519a	2014-03-15 16:17:58 +0530	[diff] [blame]	651	/*
				652	* This ensures that generic code that rely on IRQ disabling
				653	* to prevent a parallel THP split work as expected.
				654	*/
				655	kick_all_cpus_sync();
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	656	}
				657
				658	/*
				659	* We want to put the pgtable in pmd and use pgtable for tracking
				660	* the base page size hptes
				661	*/
				662	void pgtable_trans_huge_deposit(struct mm_struct mm, pmd_t pmdp,
				663	pgtable_t pgtable)
				664	{
				665	pgtable_t *pgtable_slot;
				666	assert_spin_locked(&mm->page_table_lock);
				667	/*
				668	* we store the pgtable in the second half of PMD
				669	*/
				670	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
				671	*pgtable_slot = pgtable;
				672	/*
				673	* expose the deposited pgtable to other cpus.
				674	* before we set the hugepage PTE at pmd level
				675	* hash fault code looks at the deposted pgtable
				676	* to store hash index values.
				677	*/
				678	smp_wmb();
				679	}
				680
				681	pgtable_t pgtable_trans_huge_withdraw(struct mm_struct mm, pmd_t pmdp)
				682	{
				683	pgtable_t pgtable;
				684	pgtable_t *pgtable_slot;
				685
				686	assert_spin_locked(&mm->page_table_lock);
				687	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
				688	pgtable = *pgtable_slot;
				689	/*
				690	* Once we withdraw, mark the entry NULL.
				691	*/
				692	*pgtable_slot = NULL;
				693	/*
				694	* We store HPTE information in the deposited PTE fragment.
				695	* zero out the content on withdraw.
				696	*/
				697	memset(pgtable, 0, PTE_FRAG_SIZE);
				698	return pgtable;
				699	}
				700
				701	/*
				702	* set a new huge pmd. We should not be called for updating
				703	* an existing pmd entry. That should go via pmd_hugepage_update.
				704	*/
				705	void set_pmd_at(struct mm_struct *mm, unsigned long addr,
				706	pmd_t *pmdp, pmd_t pmd)
				707	{
				708	#ifdef CONFIG_DEBUG_VM
Aneesh Kumar K.V	8937ba4	2013-11-18 14:58:12 +0530	[diff] [blame]	709	WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	710	assert_spin_locked(&mm->page_table_lock);
				711	WARN_ON(!pmd_trans_huge(pmd));
				712	#endif
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame]	713	trace_hugepage_set_pmd(addr, pmd);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	714	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
				715	}
				716
				717	void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
				718	pmd_t *pmdp)
				719	{
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	720	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	721	}
				722
				723	/*
				724	* A linux hugepage PMD was changed and the corresponding hash table entries
				725	* neesd to be flushed.
				726	*/
				727	void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	728	pmd_t *pmdp, unsigned long old_pmd)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	729	{
				730	int ssize, i;
				731	unsigned long s_addr;
Aneesh Kumar K.V	1a52728	2013-06-20 14:30:27 +0530	[diff] [blame]	732	int max_hpte_count;
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	733	unsigned int psize, valid;
				734	unsigned char *hpte_slot_array;
				735	unsigned long hidx, vpn, vsid, hash, shift, slot;
				736
				737	/*
				738	* Flush all the hptes mapping this hugepage
				739	*/
				740	s_addr = addr & HPAGE_PMD_MASK;
				741	hpte_slot_array = get_hpte_slot_array(pmdp);
				742	/*
				743	* IF we try to do a HUGE PTE update after a withdraw is done.
				744	* we will find the below NULL. This happens when we do
				745	* split_huge_page_pmd
				746	*/
				747	if (!hpte_slot_array)
				748	return;
				749
Aneesh Kumar K.V	fa1f8ae	2014-08-13 12:31:58 +0530	[diff] [blame]	750	/* get the base page size,vsid and segment size */
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	751	#ifdef CONFIG_DEBUG_VM
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	752	psize = get_slice_psize(mm, s_addr);
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	753	BUG_ON(psize == MMU_PAGE_16M);
				754	#endif
				755	if (old_pmd & _PAGE_COMBO)
				756	psize = MMU_PAGE_4K;
				757	else
				758	psize = MMU_PAGE_64K;
				759
Aneesh Kumar K.V	fa1f8ae	2014-08-13 12:31:58 +0530	[diff] [blame]	760	if (!is_kernel_addr(s_addr)) {
				761	ssize = user_segment_size(s_addr);
				762	vsid = get_vsid(mm->context.id, s_addr, ssize);
				763	WARN_ON(vsid == 0);
				764	} else {
				765	vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize);
				766	ssize = mmu_kernel_ssize;
				767	}
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	768
Aneesh Kumar K.V	1a52728	2013-06-20 14:30:27 +0530	[diff] [blame]	769	if (ppc_md.hugepage_invalidate)
Aneesh Kumar K.V	fa1f8ae	2014-08-13 12:31:58 +0530	[diff] [blame]	770	return ppc_md.hugepage_invalidate(vsid, s_addr,
				771	hpte_slot_array,
				772	psize, ssize);
Aneesh Kumar K.V	1a52728	2013-06-20 14:30:27 +0530	[diff] [blame]	773	/*
				774	* No bluk hpte removal support, invalidate each entry
				775	*/
				776	shift = mmu_psize_defs[psize].shift;
				777	max_hpte_count = HPAGE_PMD_SIZE >> shift;
				778	for (i = 0; i < max_hpte_count; i++) {
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	779	/*
				780	* 8 bits per each hpte entries
				781	* 000\| [ secondary group (one bit) \| hidx (3 bits) \| valid bit]
				782	*/
				783	valid = hpte_valid(hpte_slot_array, i);
				784	if (!valid)
				785	continue;
				786	hidx = hpte_hash_index(hpte_slot_array, i);
				787
				788	/* get the vpn */
				789	addr = s_addr + (i * (1ul << shift));
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	790	vpn = hpt_vpn(addr, vsid, ssize);
				791	hash = hpt_hash(vpn, shift, ssize);
				792	if (hidx & _PTEIDX_SECONDARY)
				793	hash = ~hash;
				794
				795	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
				796	slot += hidx & _PTEIDX_GROUP_IX;
				797	ppc_md.hpte_invalidate(slot, vpn, psize,
				798	MMU_PAGE_16M, ssize, 0);
				799	}
				800	}
				801
				802	static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
				803	{
				804	pmd_val(pmd) \|= pgprot_val(pgprot);
				805	return pmd;
				806	}
				807
				808	pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
				809	{
				810	pmd_t pmd;
				811	/*
				812	* For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always
				813	* set. We use this to check THP page at pmd level.
				814	* leaf pte for huge page, bottom two bits != 00
				815	*/
				816	pmd_val(pmd) = pfn << PTE_RPN_SHIFT;
				817	pmd_val(pmd) \|= _PAGE_THP_HUGE;
				818	pmd = pmd_set_protbits(pmd, pgprot);
				819	return pmd;
				820	}
				821
				822	pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
				823	{
				824	return pfn_pmd(page_to_pfn(page), pgprot);
				825	}
				826
				827	pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
				828	{
				829
				830	pmd_val(pmd) &= _HPAGE_CHG_MASK;
				831	pmd = pmd_set_protbits(pmd, newprot);
				832	return pmd;
				833	}
				834
				835	/*
				836	* This is called at the end of handling a user page fault, when the
				837	* fault has been handled by updating a HUGE PMD entry in the linux page tables.
				838	* We use it to preload an HPTE into the hash table corresponding to
				839	* the updated linux HUGE PMD entry.
				840	*/
				841	void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
				842	pmd_t *pmd)
				843	{
				844	return;
				845	}
				846
				847	pmd_t pmdp_get_and_clear(struct mm_struct *mm,
				848	unsigned long addr, pmd_t *pmdp)
				849	{
				850	pmd_t old_pmd;
				851	pgtable_t pgtable;
				852	unsigned long old;
				853	pgtable_t *pgtable_slot;
				854
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	855	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	856	old_pmd = __pmd(old);
				857	/*
				858	* We have pmd == none and we are holding page_table_lock.
				859	* So we can safely go and clear the pgtable hash
				860	* index info.
				861	*/
				862	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
				863	pgtable = *pgtable_slot;
				864	/*
				865	* Let's zero out old valid and hash index details
				866	* hash fault look at them.
				867	*/
				868	memset(pgtable, 0, PTE_FRAG_SIZE);
				869	return old_pmd;
				870	}
Aneesh Kumar K.V	437d496	2013-06-20 14:30:26 +0530	[diff] [blame]	871
				872	int has_transparent_hugepage(void)
				873	{
				874	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
				875	return 0;
				876	/*
				877	* We support THP only if PMD_SIZE is 16MB.
				878	*/
				879	if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
				880	return 0;
				881	/*
				882	* We need to make sure that we support 16MB hugepage in a segement
				883	* with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
				884	* of 64K.
				885	*/
				886	/*
				887	* If we have 64K HPTE, we will be using that by default
				888	*/
				889	if (mmu_psize_defs[MMU_PAGE_64K].shift &&
				890	(mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
				891	return 0;
				892	/*
				893	* Ok we only have 4K HPTE
				894	*/
				895	if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
				896	return 0;
				897
				898	return 1;
				899	}
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	900	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */