Blame - arch/powerpc/mm/pgtable_64.c - kernel/msm-5.4

blob: c8d709ab489d0ab5714fc35f0e7be2d542e2e472 [file] [log] [blame]

Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	1	/*
				2	* This file contains ioremap and related functions for 64-bit machines.
				3	*
				4	* Derived from arch/ppc64/mm/init.c
				5	* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
				6	*
				7	* Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
				8	* and Cort Dougan (PReP) (cort@cs.nmt.edu)
				9	* Copyright (C) 1996 Paul Mackerras
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	10	*
				11	* Derived from "arch/i386/mm/init.c"
				12	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
				13	*
				14	* Dave Engebretsen <engebret@us.ibm.com>
				15	* Rework for PPC64 port.
				16	*
				17	* This program is free software; you can redistribute it and/or
				18	* modify it under the terms of the GNU General Public License
				19	* as published by the Free Software Foundation; either version
				20	* 2 of the License, or (at your option) any later version.
				21	*
				22	*/
				23
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	24	#include <linux/signal.h>
				25	#include <linux/sched.h>
				26	#include <linux/kernel.h>
				27	#include <linux/errno.h>
				28	#include <linux/string.h>
Paul Gortmaker	66b15db	2011-05-27 10:46:24 -0400	[diff] [blame]	29	#include <linux/export.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	30	#include <linux/types.h>
				31	#include <linux/mman.h>
				32	#include <linux/mm.h>
				33	#include <linux/swap.h>
				34	#include <linux/stddef.h>
				35	#include <linux/vmalloc.h>
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	36	#include <linux/bootmem.h>
Yinghai Lu	95f72d1	2010-07-12 14:36:09 +1000	[diff] [blame]	37	#include <linux/memblock.h>
Tejun Heo	5a0e3ad	2010-03-24 17:04:11 +0900	[diff] [blame]	38	#include <linux/slab.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	39
				40	#include <asm/pgalloc.h>
				41	#include <asm/page.h>
				42	#include <asm/prom.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	43	#include <asm/io.h>
				44	#include <asm/mmu_context.h>
				45	#include <asm/pgtable.h>
				46	#include <asm/mmu.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	47	#include <asm/smp.h>
				48	#include <asm/machdep.h>
				49	#include <asm/tlb.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	50	#include <asm/processor.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	51	#include <asm/cputable.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	52	#include <asm/sections.h>
Stephen Rothwell	5e203d6	2006-09-25 13:36:31 +1000	[diff] [blame]	53	#include <asm/firmware.h>
David Gibson	800fc3e	2005-11-16 15:43:48 +1100	[diff] [blame]	54
				55	#include "mmu_decl.h"
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	56
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame^]	57	#define CREATE_TRACE_POINTS
				58	#include <trace/events/thp.h>
				59
Aneesh Kumar K.V	78f1dbd	2012-09-10 02:52:57 +0000	[diff] [blame]	60	/* Some sanity checking */
				61	#if TASK_SIZE_USER64 > PGTABLE_RANGE
				62	#error TASK_SIZE_USER64 exceeds pagetable range
				63	#endif
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	64
Aneesh Kumar K.V	78f1dbd	2012-09-10 02:52:57 +0000	[diff] [blame]	65	#ifdef CONFIG_PPC_STD_MMU_64
Aneesh Kumar K.V	af81d78	2013-03-13 03:34:55 +0000	[diff] [blame]	66	#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
Aneesh Kumar K.V	78f1dbd	2012-09-10 02:52:57 +0000	[diff] [blame]	67	#error TASK_SIZE_USER64 exceeds user VSID range
				68	#endif
				69	#endif
				70
				71	unsigned long ioremap_bot = IOREMAP_BASE;
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	72
				73	#ifdef CONFIG_PPC_MMU_NOHASH
Scott Wood	7d17622	2014-08-01 22:07:40 -0500	[diff] [blame]	74	static __ref void *early_alloc_pgtable(unsigned long size)
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	75	{
				76	void *pt;
				77
				78	if (init_bootmem_done)
				79	pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS));
				80	else
Yinghai Lu	95f72d1	2010-07-12 14:36:09 +1000	[diff] [blame]	81	pt = __va(memblock_alloc_base(size, size,
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	82	__pa(MAX_DMA_ADDRESS)));
				83	memset(pt, 0, size);
				84
				85	return pt;
				86	}
				87	#endif /* CONFIG_PPC_MMU_NOHASH */
				88
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	89	/*
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	90	* map_kernel_page currently only called by __ioremap
				91	* map_kernel_page adds an entry to the ioremap page table
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	92	* and adds an entry to the HPT, possibly bolting it
				93	*/
Benjamin Herrenschmidt	32a7494	2009-07-23 23:15:58 +0000	[diff] [blame]	94	int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	95	{
				96	pgd_t *pgdp;
				97	pud_t *pudp;
				98	pmd_t *pmdp;
				99	pte_t *ptep;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	100
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	101	if (slab_is_available()) {
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	102	pgdp = pgd_offset_k(ea);
				103	pudp = pud_alloc(&init_mm, pgdp, ea);
				104	if (!pudp)
				105	return -ENOMEM;
				106	pmdp = pmd_alloc(&init_mm, pudp, ea);
				107	if (!pmdp)
				108	return -ENOMEM;
Paul Mackerras	23fd077	2005-10-31 13:37:12 +1100	[diff] [blame]	109	ptep = pte_alloc_kernel(pmdp, ea);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	110	if (!ptep)
				111	return -ENOMEM;
				112	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
				113	__pgprot(flags)));
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	114	} else {
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	115	#ifdef CONFIG_PPC_MMU_NOHASH
				116	/* Warning ! This will blow up if bootmem is not initialized
				117	* which our ppc64 code is keen to do that, we'll need to
				118	* fix it and/or be more careful
				119	*/
				120	pgdp = pgd_offset_k(ea);
				121	#ifdef PUD_TABLE_SIZE
				122	if (pgd_none(*pgdp)) {
				123	pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
				124	BUG_ON(pudp == NULL);
				125	pgd_populate(&init_mm, pgdp, pudp);
				126	}
				127	#endif /* PUD_TABLE_SIZE */
				128	pudp = pud_offset(pgdp, ea);
				129	if (pud_none(*pudp)) {
				130	pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
				131	BUG_ON(pmdp == NULL);
				132	pud_populate(&init_mm, pudp, pmdp);
				133	}
				134	pmdp = pmd_offset(pudp, ea);
				135	if (!pmd_present(*pmdp)) {
				136	ptep = early_alloc_pgtable(PAGE_SIZE);
				137	BUG_ON(ptep == NULL);
				138	pmd_populate_kernel(&init_mm, pmdp, ptep);
				139	}
				140	ptep = pte_offset_kernel(pmdp, ea);
				141	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
				142	__pgprot(flags)));
				143	#else /* CONFIG_PPC_MMU_NOHASH */
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	144	/*
				145	* If the mm subsystem is not fully up, we cannot create a
				146	* linux page table entry for this mapping. Simply bolt an
				147	* entry in the hardware page table.
Benjamin Herrenschmidt	3c726f8	2005-11-07 11:06:55 +1100	[diff] [blame]	148	*
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	149	*/
Paul Mackerras	1189be6	2007-10-11 20:37:10 +1000	[diff] [blame]	150	if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
				151	mmu_io_psize, mmu_kernel_ssize)) {
Benjamin Herrenschmidt	77ac166	2005-11-10 11:12:11 +1100	[diff] [blame]	152	printk(KERN_ERR "Failed to do bolted mapping IO "
				153	"memory at %016lx !\n", pa);
				154	return -ENOMEM;
				155	}
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	156	#endif /* !CONFIG_PPC_MMU_NOHASH */
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	157	}
Scott Wood	47ce8af	2013-10-11 19:22:37 -0500	[diff] [blame]	158
				159	#ifdef CONFIG_PPC_BOOK3E_64
				160	/*
				161	* With hardware tablewalk, a sync is needed to ensure that
				162	* subsequent accesses see the PTE we just wrote. Unlike userspace
				163	* mappings, we can't tolerate spurious faults, so make sure
				164	* the new PTE will be seen the first time.
				165	*/
				166	mb();
				167	#else
				168	smp_wmb();
				169	#endif
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	170	return 0;
				171	}
				172
				173
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	174	/**
				175	* __ioremap_at - Low level function to establish the page tables
				176	* for an IO mapping
				177	*/
				178	void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	179	unsigned long flags)
				180	{
				181	unsigned long i;
				182
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	183	/* Make sure we have the base flags */
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	184	if ((flags & _PAGE_PRESENT) == 0)
				185	flags \|= pgprot_val(PAGE_KERNEL);
				186
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	187	/* Non-cacheable page cannot be coherent */
				188	if (flags & _PAGE_NO_CACHE)
				189	flags &= ~_PAGE_COHERENT;
				190
				191	/* We don't support the 4K PFN hack with ioremap */
				192	if (flags & _PAGE_4K_PFN)
				193	return NULL;
				194
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	195	WARN_ON(pa & ~PAGE_MASK);
				196	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
				197	WARN_ON(size & ~PAGE_MASK);
				198
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	199	for (i = 0; i < size; i += PAGE_SIZE)
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	200	if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	201	return NULL;
				202
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	203	return (void __iomem *)ea;
				204	}
				205
				206	/**
				207	* __iounmap_from - Low level function to tear down the page tables
				208	* for an IO mapping. This is used for mappings that
				209	* are manipulated manually, like partial unmapping of
				210	* PCI IOs or ISA space.
				211	*/
				212	void __iounmap_at(void *ea, unsigned long size)
				213	{
				214	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
				215	WARN_ON(size & ~PAGE_MASK);
				216
				217	unmap_kernel_range((unsigned long)ea, size);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	218	}
				219
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	220	void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
				221	unsigned long flags, void *caller)
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	222	{
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	223	phys_addr_t paligned;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	224	void __iomem *ret;
				225
				226	/*
				227	* Choose an address to map it to.
				228	* Once the imalloc system is running, we use it.
				229	* Before that, we map using addresses going
				230	* up from ioremap_bot. imalloc will use
				231	* the addresses from ioremap_bot through
				232	* IMALLOC_END
				233	*
				234	*/
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	235	paligned = addr & PAGE_MASK;
				236	size = PAGE_ALIGN(addr + size) - paligned;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	237
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	238	if ((size == 0) \|\| (paligned == 0))
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	239	return NULL;
				240
				241	if (mem_init_done) {
				242	struct vm_struct *area;
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	243
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	244	area = __get_vm_area_caller(size, VM_IOREMAP,
				245	ioremap_bot, IOREMAP_END,
				246	caller);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	247	if (area == NULL)
				248	return NULL;
Michael Ellerman	7a9d125	2010-11-28 18:26:36 +0000	[diff] [blame]	249
				250	area->phys_addr = paligned;
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	251	ret = __ioremap_at(paligned, area->addr, size, flags);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	252	if (!ret)
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	253	vunmap(area->addr);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	254	} else {
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	255	ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	256	if (ret)
				257	ioremap_bot += size;
				258	}
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	259
				260	if (ret)
				261	ret += addr & ~PAGE_MASK;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	262	return ret;
				263	}
				264
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	265	void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
				266	unsigned long flags)
				267	{
				268	return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
				269	}
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	270
Benjamin Herrenschmidt	68a6435	2006-11-13 09:27:39 +1100	[diff] [blame]	271	void __iomem * ioremap(phys_addr_t addr, unsigned long size)
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	272	{
				273	unsigned long flags = _PAGE_NO_CACHE \| _PAGE_GUARDED;
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	274	void *caller = __builtin_return_address(0);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	275
				276	if (ppc_md.ioremap)
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	277	return ppc_md.ioremap(addr, size, flags, caller);
				278	return __ioremap_caller(addr, size, flags, caller);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	279	}
				280
Anton Blanchard	be135f4	2011-05-08 21:41:59 +0000	[diff] [blame]	281	void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
				282	{
				283	unsigned long flags = _PAGE_NO_CACHE;
				284	void *caller = __builtin_return_address(0);
				285
				286	if (ppc_md.ioremap)
				287	return ppc_md.ioremap(addr, size, flags, caller);
				288	return __ioremap_caller(addr, size, flags, caller);
				289	}
				290
Anton Blanchard	40f1ce7	2011-05-08 21:43:47 +0000	[diff] [blame]	291	void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	292	unsigned long flags)
				293	{
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	294	void *caller = __builtin_return_address(0);
				295
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	296	/* writeable implies dirty for kernel addresses */
				297	if (flags & _PAGE_RW)
				298	flags \|= _PAGE_DIRTY;
				299
				300	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
				301	flags &= ~(_PAGE_USER \| _PAGE_EXEC);
				302
Benjamin Herrenschmidt	55052ee	2010-04-07 14:39:36 +1000	[diff] [blame]	303	#ifdef _PAGE_BAP_SR
				304	/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
				305	* which means that we just cleared supervisor access... oops ;-) This
				306	* restores it
				307	*/
				308	flags \|= _PAGE_BAP_SR;
				309	#endif
				310
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	311	if (ppc_md.ioremap)
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	312	return ppc_md.ioremap(addr, size, flags, caller);
				313	return __ioremap_caller(addr, size, flags, caller);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	314	}
				315
				316
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	317	/*
				318	* Unmap an IO region and remove it from imalloc'd list.
				319	* Access to IO memory should be serialized by driver.
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	320	*/
Benjamin Herrenschmidt	68a6435	2006-11-13 09:27:39 +1100	[diff] [blame]	321	void __iounmap(volatile void __iomem *token)
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	322	{
				323	void *addr;
				324
				325	if (!mem_init_done)
				326	return;
				327
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	328	addr = (void *) ((unsigned long __force)
				329	PCI_FIX_ADDR(token) & PAGE_MASK);
				330	if ((unsigned long)addr < ioremap_bot) {
				331	printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
				332	" at 0x%p\n", addr);
				333	return;
				334	}
				335	vunmap(addr);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	336	}
				337
Benjamin Herrenschmidt	68a6435	2006-11-13 09:27:39 +1100	[diff] [blame]	338	void iounmap(volatile void __iomem *token)
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	339	{
				340	if (ppc_md.iounmap)
				341	ppc_md.iounmap(token);
				342	else
				343	__iounmap(token);
				344	}
				345
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	346	EXPORT_SYMBOL(ioremap);
Anton Blanchard	be135f4	2011-05-08 21:41:59 +0000	[diff] [blame]	347	EXPORT_SYMBOL(ioremap_wc);
Anton Blanchard	40f1ce7	2011-05-08 21:43:47 +0000	[diff] [blame]	348	EXPORT_SYMBOL(ioremap_prot);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	349	EXPORT_SYMBOL(__ioremap);
Olof Johansson	a302cb9	2007-08-31 13:58:51 +1000	[diff] [blame]	350	EXPORT_SYMBOL(__ioremap_at);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	351	EXPORT_SYMBOL(iounmap);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	352	EXPORT_SYMBOL(__iounmap);
Olof Johansson	a302cb9	2007-08-31 13:58:51 +1000	[diff] [blame]	353	EXPORT_SYMBOL(__iounmap_at);
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	354
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	355	/*
				356	* For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
				357	* For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
				358	*/
				359	struct page *pmd_page(pmd_t pmd)
				360	{
				361	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
				362	if (pmd_trans_huge(pmd))
				363	return pfn_to_page(pmd_pfn(pmd));
				364	#endif
				365	return virt_to_page(pmd_page_vaddr(pmd));
				366	}
				367
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	368	#ifdef CONFIG_PPC_64K_PAGES
				369	static pte_t get_from_cache(struct mm_struct mm)
				370	{
				371	void pte_frag, ret;
				372
				373	spin_lock(&mm->page_table_lock);
				374	ret = mm->context.pte_frag;
				375	if (ret) {
				376	pte_frag = ret + PTE_FRAG_SIZE;
				377	/*
				378	* If we have taken up all the fragments mark PTE page NULL
				379	*/
				380	if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
				381	pte_frag = NULL;
				382	mm->context.pte_frag = pte_frag;
				383	}
				384	spin_unlock(&mm->page_table_lock);
				385	return (pte_t *)ret;
				386	}
				387
				388	static pte_t __alloc_for_cache(struct mm_struct mm, int kernel)
				389	{
				390	void *ret = NULL;
				391	struct page *page = alloc_page(GFP_KERNEL \| __GFP_NOTRACK \|
				392	__GFP_REPEAT \| __GFP_ZERO);
				393	if (!page)
				394	return NULL;
Kirill A. Shutemov	4f804943	2013-11-14 14:31:38 -0800	[diff] [blame]	395	if (!kernel && !pgtable_page_ctor(page)) {
				396	__free_page(page);
				397	return NULL;
				398	}
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	399
				400	ret = page_address(page);
				401	spin_lock(&mm->page_table_lock);
				402	/*
				403	* If we find pgtable_page set, we return
				404	* the allocated page with single fragement
				405	* count.
				406	*/
				407	if (likely(!mm->context.pte_frag)) {
				408	atomic_set(&page->_count, PTE_FRAG_NR);
				409	mm->context.pte_frag = ret + PTE_FRAG_SIZE;
				410	}
				411	spin_unlock(&mm->page_table_lock);
				412
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	413	return (pte_t *)ret;
				414	}
				415
				416	pte_t page_table_alloc(struct mm_struct mm, unsigned long vmaddr, int kernel)
				417	{
				418	pte_t *pte;
				419
				420	pte = get_from_cache(mm);
				421	if (pte)
				422	return pte;
				423
				424	return __alloc_for_cache(mm, kernel);
				425	}
				426
				427	void page_table_free(struct mm_struct mm, unsigned long table, int kernel)
				428	{
				429	struct page *page = virt_to_page(table);
				430	if (put_page_testzero(page)) {
				431	if (!kernel)
				432	pgtable_page_dtor(page);
				433	free_hot_cold_page(page, 0);
				434	}
				435	}
				436
				437	#ifdef CONFIG_SMP
				438	static void page_table_free_rcu(void *table)
				439	{
				440	struct page *page = virt_to_page(table);
				441	if (put_page_testzero(page)) {
				442	pgtable_page_dtor(page);
				443	free_hot_cold_page(page, 0);
				444	}
				445	}
				446
				447	void pgtable_free_tlb(struct mmu_gather tlb, void table, int shift)
				448	{
				449	unsigned long pgf = (unsigned long)table;
				450
				451	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
				452	pgf \|= shift;
				453	tlb_remove_table(tlb, (void *)pgf);
				454	}
				455
				456	void __tlb_remove_table(void *_table)
				457	{
				458	void table = (void )((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
				459	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
				460
				461	if (!shift)
				462	/* PTE page needs special handling */
				463	page_table_free_rcu(table);
				464	else {
				465	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
				466	kmem_cache_free(PGT_CACHE(shift), table);
				467	}
				468	}
				469	#else
				470	void pgtable_free_tlb(struct mmu_gather tlb, void table, int shift)
				471	{
				472	if (!shift) {
				473	/* PTE page needs special handling */
				474	struct page *page = virt_to_page(table);
				475	if (put_page_testzero(page)) {
				476	pgtable_page_dtor(page);
				477	free_hot_cold_page(page, 0);
				478	}
				479	} else {
				480	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
				481	kmem_cache_free(PGT_CACHE(shift), table);
				482	}
				483	}
				484	#endif
				485	#endif /* CONFIG_PPC_64K_PAGES */
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	486
				487	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
				488
				489	/*
				490	* This is called when relaxing access to a hugepage. It's also called in the page
				491	* fault path when we don't hit any of the major fault cases, ie, a minor
				492	* update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
				493	* handled those two for us, we additionally deal with missing execute
				494	* permission here on some processors
				495	*/
				496	int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
				497	pmd_t *pmdp, pmd_t entry, int dirty)
				498	{
				499	int changed;
				500	#ifdef CONFIG_DEBUG_VM
				501	WARN_ON(!pmd_trans_huge(*pmdp));
				502	assert_spin_locked(&vma->vm_mm->page_table_lock);
				503	#endif
				504	changed = !pmd_same(*(pmdp), entry);
				505	if (changed) {
				506	__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
				507	/*
				508	* Since we are not supporting SW TLB systems, we don't
				509	* have any thing similar to flush_tlb_page_nohash()
				510	*/
				511	}
				512	return changed;
				513	}
				514
				515	unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	516	pmd_t *pmdp, unsigned long clr,
				517	unsigned long set)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	518	{
				519
				520	unsigned long old, tmp;
				521
				522	#ifdef CONFIG_DEBUG_VM
				523	WARN_ON(!pmd_trans_huge(*pmdp));
				524	assert_spin_locked(&mm->page_table_lock);
				525	#endif
				526
				527	#ifdef PTE_ATOMIC_UPDATES
				528	__asm__ __volatile__(
				529	"1: ldarx %0,0,%3\n\
				530	andi. %1,%0,%6\n\
				531	bne- 1b \n\
				532	andc %1,%0,%4 \n\
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	533	or %1,%1,%7\n\
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	534	stdcx. %1,0,%3 \n\
				535	bne- 1b"
				536	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	537	: "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY), "r" (set)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	538	: "cc" );
				539	#else
				540	old = pmd_val(*pmdp);
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	541	*pmdp = __pmd((old & ~clr) \| set);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	542	#endif
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame^]	543	trace_hugepage_update(addr, old, clr, set);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	544	if (old & _PAGE_HASHPTE)
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	545	hpte_do_hugepage_flush(mm, addr, pmdp, old);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	546	return old;
				547	}
				548
				549	pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address,
				550	pmd_t *pmdp)
				551	{
				552	pmd_t pmd;
				553
				554	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
				555	if (pmd_trans_huge(*pmdp)) {
				556	pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp);
				557	} else {
				558	/*
				559	* khugepaged calls this for normal pmd
				560	*/
				561	pmd = *pmdp;
				562	pmd_clear(pmdp);
				563	/*
				564	* Wait for all pending hash_page to finish. This is needed
				565	* in case of subpage collapse. When we collapse normal pages
				566	* to hugepage, we first clear the pmd, then invalidate all
				567	* the PTE entries. The assumption here is that any low level
				568	* page fault will see a none pmd and take the slow path that
				569	* will wait on mmap_sem. But we could very well be in a
				570	* hash_page with local ptep pointer value. Such a hash page
				571	* can result in adding new HPTE entries for normal subpages.
				572	* That means we could be modifying the page content as we
				573	* copy them to a huge page. So wait for parallel hash_page
				574	* to finish before invalidating HPTE entries. We can do this
				575	* by sending an IPI to all the cpus and executing a dummy
				576	* function there.
				577	*/
				578	kick_all_cpus_sync();
				579	/*
				580	* Now invalidate the hpte entries in the range
				581	* covered by pmd. This make sure we take a
				582	* fault and will find the pmd as none, which will
				583	* result in a major fault which takes mmap_sem and
				584	* hence wait for collapse to complete. Without this
				585	* the __collapse_huge_page_copy can result in copying
				586	* the old content.
				587	*/
				588	flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
				589	}
				590	return pmd;
				591	}
				592
				593	int pmdp_test_and_clear_young(struct vm_area_struct *vma,
				594	unsigned long address, pmd_t *pmdp)
				595	{
				596	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
				597	}
				598
				599	/*
				600	* We currently remove entries from the hashtable regardless of whether
				601	* the entry was young or dirty. The generic routines only flush if the
				602	* entry was young or dirty which is not good enough.
				603	*
				604	* We should be more intelligent about this but for the moment we override
				605	* these functions and force a tlb flush unconditionally
				606	*/
				607	int pmdp_clear_flush_young(struct vm_area_struct *vma,
				608	unsigned long address, pmd_t *pmdp)
				609	{
				610	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
				611	}
				612
				613	/*
				614	* We mark the pmd splitting and invalidate all the hpte
				615	* entries for this hugepage.
				616	*/
				617	void pmdp_splitting_flush(struct vm_area_struct *vma,
				618	unsigned long address, pmd_t *pmdp)
				619	{
				620	unsigned long old, tmp;
				621
				622	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
				623
				624	#ifdef CONFIG_DEBUG_VM
				625	WARN_ON(!pmd_trans_huge(*pmdp));
				626	assert_spin_locked(&vma->vm_mm->page_table_lock);
				627	#endif
				628
				629	#ifdef PTE_ATOMIC_UPDATES
				630
				631	__asm__ __volatile__(
				632	"1: ldarx %0,0,%3\n\
				633	andi. %1,%0,%6\n\
				634	bne- 1b \n\
				635	ori %1,%0,%4 \n\
				636	stdcx. %1,0,%3 \n\
				637	bne- 1b"
				638	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
				639	: "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY)
				640	: "cc" );
				641	#else
				642	old = pmd_val(*pmdp);
				643	*pmdp = __pmd(old \| _PAGE_SPLITTING);
				644	#endif
				645	/*
				646	* If we didn't had the splitting flag set, go and flush the
				647	* HPTE entries.
				648	*/
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame^]	649	trace_hugepage_splitting(address, old);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	650	if (!(old & _PAGE_SPLITTING)) {
				651	/* We need to flush the hpte */
				652	if (old & _PAGE_HASHPTE)
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	653	hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	654	}
Aneesh Kumar K.V	346519a	2014-03-15 16:17:58 +0530	[diff] [blame]	655	/*
				656	* This ensures that generic code that rely on IRQ disabling
				657	* to prevent a parallel THP split work as expected.
				658	*/
				659	kick_all_cpus_sync();
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	660	}
				661
				662	/*
				663	* We want to put the pgtable in pmd and use pgtable for tracking
				664	* the base page size hptes
				665	*/
				666	void pgtable_trans_huge_deposit(struct mm_struct mm, pmd_t pmdp,
				667	pgtable_t pgtable)
				668	{
				669	pgtable_t *pgtable_slot;
				670	assert_spin_locked(&mm->page_table_lock);
				671	/*
				672	* we store the pgtable in the second half of PMD
				673	*/
				674	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
				675	*pgtable_slot = pgtable;
				676	/*
				677	* expose the deposited pgtable to other cpus.
				678	* before we set the hugepage PTE at pmd level
				679	* hash fault code looks at the deposted pgtable
				680	* to store hash index values.
				681	*/
				682	smp_wmb();
				683	}
				684
				685	pgtable_t pgtable_trans_huge_withdraw(struct mm_struct mm, pmd_t pmdp)
				686	{
				687	pgtable_t pgtable;
				688	pgtable_t *pgtable_slot;
				689
				690	assert_spin_locked(&mm->page_table_lock);
				691	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
				692	pgtable = *pgtable_slot;
				693	/*
				694	* Once we withdraw, mark the entry NULL.
				695	*/
				696	*pgtable_slot = NULL;
				697	/*
				698	* We store HPTE information in the deposited PTE fragment.
				699	* zero out the content on withdraw.
				700	*/
				701	memset(pgtable, 0, PTE_FRAG_SIZE);
				702	return pgtable;
				703	}
				704
				705	/*
				706	* set a new huge pmd. We should not be called for updating
				707	* an existing pmd entry. That should go via pmd_hugepage_update.
				708	*/
				709	void set_pmd_at(struct mm_struct *mm, unsigned long addr,
				710	pmd_t *pmdp, pmd_t pmd)
				711	{
				712	#ifdef CONFIG_DEBUG_VM
Aneesh Kumar K.V	8937ba4	2013-11-18 14:58:12 +0530	[diff] [blame]	713	WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	714	assert_spin_locked(&mm->page_table_lock);
				715	WARN_ON(!pmd_trans_huge(pmd));
				716	#endif
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame^]	717	trace_hugepage_set_pmd(addr, pmd);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	718	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
				719	}
				720
				721	void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
				722	pmd_t *pmdp)
				723	{
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	724	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	725	}
				726
				727	/*
				728	* A linux hugepage PMD was changed and the corresponding hash table entries
				729	* neesd to be flushed.
				730	*/
				731	void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	732	pmd_t *pmdp, unsigned long old_pmd)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	733	{
				734	int ssize, i;
				735	unsigned long s_addr;
Aneesh Kumar K.V	1a52728	2013-06-20 14:30:27 +0530	[diff] [blame]	736	int max_hpte_count;
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	737	unsigned int psize, valid;
				738	unsigned char *hpte_slot_array;
				739	unsigned long hidx, vpn, vsid, hash, shift, slot;
				740
				741	/*
				742	* Flush all the hptes mapping this hugepage
				743	*/
				744	s_addr = addr & HPAGE_PMD_MASK;
				745	hpte_slot_array = get_hpte_slot_array(pmdp);
				746	/*
				747	* IF we try to do a HUGE PTE update after a withdraw is done.
				748	* we will find the below NULL. This happens when we do
				749	* split_huge_page_pmd
				750	*/
				751	if (!hpte_slot_array)
				752	return;
				753
Aneesh Kumar K.V	fa1f8ae	2014-08-13 12:31:58 +0530	[diff] [blame]	754	/* get the base page size,vsid and segment size */
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	755	#ifdef CONFIG_DEBUG_VM
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	756	psize = get_slice_psize(mm, s_addr);
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	757	BUG_ON(psize == MMU_PAGE_16M);
				758	#endif
				759	if (old_pmd & _PAGE_COMBO)
				760	psize = MMU_PAGE_4K;
				761	else
				762	psize = MMU_PAGE_64K;
				763
Aneesh Kumar K.V	fa1f8ae	2014-08-13 12:31:58 +0530	[diff] [blame]	764	if (!is_kernel_addr(s_addr)) {
				765	ssize = user_segment_size(s_addr);
				766	vsid = get_vsid(mm->context.id, s_addr, ssize);
				767	WARN_ON(vsid == 0);
				768	} else {
				769	vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize);
				770	ssize = mmu_kernel_ssize;
				771	}
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	772
Aneesh Kumar K.V	1a52728	2013-06-20 14:30:27 +0530	[diff] [blame]	773	if (ppc_md.hugepage_invalidate)
Aneesh Kumar K.V	fa1f8ae	2014-08-13 12:31:58 +0530	[diff] [blame]	774	return ppc_md.hugepage_invalidate(vsid, s_addr,
				775	hpte_slot_array,
				776	psize, ssize);
Aneesh Kumar K.V	1a52728	2013-06-20 14:30:27 +0530	[diff] [blame]	777	/*
				778	* No bluk hpte removal support, invalidate each entry
				779	*/
				780	shift = mmu_psize_defs[psize].shift;
				781	max_hpte_count = HPAGE_PMD_SIZE >> shift;
				782	for (i = 0; i < max_hpte_count; i++) {
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	783	/*
				784	* 8 bits per each hpte entries
				785	* 000\| [ secondary group (one bit) \| hidx (3 bits) \| valid bit]
				786	*/
				787	valid = hpte_valid(hpte_slot_array, i);
				788	if (!valid)
				789	continue;
				790	hidx = hpte_hash_index(hpte_slot_array, i);
				791
				792	/* get the vpn */
				793	addr = s_addr + (i * (1ul << shift));
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	794	vpn = hpt_vpn(addr, vsid, ssize);
				795	hash = hpt_hash(vpn, shift, ssize);
				796	if (hidx & _PTEIDX_SECONDARY)
				797	hash = ~hash;
				798
				799	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
				800	slot += hidx & _PTEIDX_GROUP_IX;
				801	ppc_md.hpte_invalidate(slot, vpn, psize,
				802	MMU_PAGE_16M, ssize, 0);
				803	}
				804	}
				805
				806	static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
				807	{
				808	pmd_val(pmd) \|= pgprot_val(pgprot);
				809	return pmd;
				810	}
				811
				812	pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
				813	{
				814	pmd_t pmd;
				815	/*
				816	* For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always
				817	* set. We use this to check THP page at pmd level.
				818	* leaf pte for huge page, bottom two bits != 00
				819	*/
				820	pmd_val(pmd) = pfn << PTE_RPN_SHIFT;
				821	pmd_val(pmd) \|= _PAGE_THP_HUGE;
				822	pmd = pmd_set_protbits(pmd, pgprot);
				823	return pmd;
				824	}
				825
				826	pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
				827	{
				828	return pfn_pmd(page_to_pfn(page), pgprot);
				829	}
				830
				831	pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
				832	{
				833
				834	pmd_val(pmd) &= _HPAGE_CHG_MASK;
				835	pmd = pmd_set_protbits(pmd, newprot);
				836	return pmd;
				837	}
				838
				839	/*
				840	* This is called at the end of handling a user page fault, when the
				841	* fault has been handled by updating a HUGE PMD entry in the linux page tables.
				842	* We use it to preload an HPTE into the hash table corresponding to
				843	* the updated linux HUGE PMD entry.
				844	*/
				845	void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
				846	pmd_t *pmd)
				847	{
				848	return;
				849	}
				850
				851	pmd_t pmdp_get_and_clear(struct mm_struct *mm,
				852	unsigned long addr, pmd_t *pmdp)
				853	{
				854	pmd_t old_pmd;
				855	pgtable_t pgtable;
				856	unsigned long old;
				857	pgtable_t *pgtable_slot;
				858
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	859	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	860	old_pmd = __pmd(old);
				861	/*
				862	* We have pmd == none and we are holding page_table_lock.
				863	* So we can safely go and clear the pgtable hash
				864	* index info.
				865	*/
				866	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
				867	pgtable = *pgtable_slot;
				868	/*
				869	* Let's zero out old valid and hash index details
				870	* hash fault look at them.
				871	*/
				872	memset(pgtable, 0, PTE_FRAG_SIZE);
				873	return old_pmd;
				874	}
Aneesh Kumar K.V	437d496	2013-06-20 14:30:26 +0530	[diff] [blame]	875
				876	int has_transparent_hugepage(void)
				877	{
				878	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
				879	return 0;
				880	/*
				881	* We support THP only if PMD_SIZE is 16MB.
				882	*/
				883	if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
				884	return 0;
				885	/*
				886	* We need to make sure that we support 16MB hugepage in a segement
				887	* with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
				888	* of 64K.
				889	*/
				890	/*
				891	* If we have 64K HPTE, we will be using that by default
				892	*/
				893	if (mmu_psize_defs[MMU_PAGE_64K].shift &&
				894	(mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
				895	return 0;
				896	/*
				897	* Ok we only have 4K HPTE
				898	*/
				899	if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
				900	return 0;
				901
				902	return 1;
				903	}
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	904	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */