Blame - arch/s390/mm/gup.c - kernel/msm-4.9

blob: be1e2ed6405d34bfc3f9ed525f97df5c974c0522 [file] [log] [blame]

Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	1	/*
				2	* Lockless get_user_pages_fast for s390
				3	*
				4	* Copyright IBM Corp. 2010
				5	* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
				6	*/
				7	#include <linux/sched.h>
				8	#include <linux/mm.h>
				9	#include <linux/hugetlb.h>
				10	#include <linux/vmstat.h>
				11	#include <linux/pagemap.h>
				12	#include <linux/rwsem.h>
				13	#include <asm/pgtable.h>
				14
				15	/*
				16	* The performance critical leaf functions are made noinline otherwise gcc
				17	* inlines everything into a single function which results in too much
				18	* register pressure.
				19	*/
				20	static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
				21	unsigned long end, int write, struct page *pages, int nr)
				22	{
Gerald Schaefer	fc897c9	2016-03-17 15:00:04 +0100	[diff] [blame]	23	struct page head, page;
Martin Schwidefsky	25591b0	2010-11-10 10:05:51 +0100	[diff] [blame]	24	unsigned long mask;
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	25	pte_t *ptep, pte;
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	26
Martin Schwidefsky	e509861	2013-07-23 20:57:57 +0200	[diff] [blame]	27	mask = (write ? _PAGE_PROTECT : 0) \| _PAGE_INVALID \| _PAGE_SPECIAL;
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	28
				29	ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
				30	do {
				31	pte = *ptep;
				32	barrier();
Gerald Schaefer	ecf46ab	2015-05-29 15:34:51 +0200	[diff] [blame]	33	/* Similar to the PMD case, NUMA hinting must take slow path */
				34	if (pte_protnone(pte))
				35	return 0;
Martin Schwidefsky	25591b0	2010-11-10 10:05:51 +0100	[diff] [blame]	36	if ((pte_val(pte) & mask) != 0)
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	37	return 0;
				38	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
				39	page = pte_page(pte);
Gerald Schaefer	fc897c9	2016-03-17 15:00:04 +0100	[diff] [blame]	40	head = compound_head(page);
Vlastimil Babka	5224c40	2019-11-29 10:03:49 +0100	[diff] [blame]	41	if (WARN_ON_ONCE(page_ref_count(head) < 0)
				42	\|\| !page_cache_get_speculative(head))
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	43	return 0;
				44	if (unlikely(pte_val(pte) != pte_val(*ptep))) {
Gerald Schaefer	fc897c9	2016-03-17 15:00:04 +0100	[diff] [blame]	45	put_page(head);
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	46	return 0;
				47	}
Gerald Schaefer	fc897c9	2016-03-17 15:00:04 +0100	[diff] [blame]	48	VM_BUG_ON_PAGE(compound_head(page) != head, page);
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	49	pages[*nr] = page;
				50	(*nr)++;
				51
				52	} while (ptep++, addr += PAGE_SIZE, addr != end);
				53
				54	return 1;
				55	}
				56
				57	static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
				58	unsigned long end, int write, struct page *pages, int nr)
				59	{
Kirill A. Shutemov	ddc58f2	2016-01-15 16:52:56 -0800	[diff] [blame]	60	struct page head, page;
Gerald Schaefer	22338c5	2017-09-18 16:51:51 +0200	[diff] [blame]	61	unsigned long mask;
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	62	int refs;
				63
Gerald Schaefer	22338c5	2017-09-18 16:51:51 +0200	[diff] [blame]	64	mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) \| _SEGMENT_ENTRY_INVALID;
				65	if ((pmd_val(pmd) & mask) != 0)
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	66	return 0;
				67	VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
				68
				69	refs = 0;
				70	head = pmd_page(pmd);
				71	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
				72	do {
				73	VM_BUG_ON(compound_head(page) != head);
				74	pages[*nr] = page;
				75	(*nr)++;
				76	page++;
				77	refs++;
				78	} while (addr += PAGE_SIZE, addr != end);
				79
Vlastimil Babka	5224c40	2019-11-29 10:03:49 +0100	[diff] [blame]	80	if (WARN_ON_ONCE(page_ref_count(head) < 0)
				81	\|\| !page_cache_add_speculative(head, refs)) {
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	82	*nr -= refs;
				83	return 0;
				84	}
				85
				86	if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
				87	*nr -= refs;
				88	while (refs--)
				89	put_page(head);
Andrea Arcangeli	0693bc9	2011-11-02 13:37:28 -0700	[diff] [blame]	90	return 0;
				91	}
				92
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	93	return 1;
				94	}
				95
				96
				97	static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
				98	unsigned long end, int write, struct page *pages, int nr)
				99	{
				100	unsigned long next;
				101	pmd_t *pmdp, pmd;
				102
				103	pmdp = (pmd_t *) pudp;
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	104	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
				105	pmdp = (pmd_t *) pud_deref(pud);
				106	pmdp += pmd_index(addr);
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	107	do {
				108	pmd = *pmdp;
				109	barrier();
				110	next = pmd_addr_end(addr, end);
Kirill A. Shutemov	fecffad	2016-01-15 16:53:24 -0800	[diff] [blame]	111	if (pmd_none(pmd))
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	112	return 0;
Gerald Schaefer	156152f	2012-10-25 17:24:12 +0200	[diff] [blame]	113	if (unlikely(pmd_large(pmd))) {
Gerald Schaefer	ecf46ab	2015-05-29 15:34:51 +0200	[diff] [blame]	114	/*
				115	* NUMA hinting faults need to be handled in the GUP
				116	* slowpath for accounting purposes and so that they
				117	* can be serialised against THP migration.
				118	*/
				119	if (pmd_protnone(pmd))
				120	return 0;
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	121	if (!gup_huge_pmd(pmdp, pmd, addr, next,
				122	write, pages, nr))
				123	return 0;
				124	} else if (!gup_pte_range(pmdp, pmd, addr, next,
				125	write, pages, nr))
				126	return 0;
				127	} while (pmdp++, addr = next, addr != end);
				128
				129	return 1;
				130	}
				131
Gerald Schaefer	d08de8e	2016-07-04 14:47:01 +0200	[diff] [blame]	132	static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
				133	unsigned long end, int write, struct page *pages, int nr)
				134	{
				135	struct page head, page;
				136	unsigned long mask;
				137	int refs;
				138
				139	mask = (write ? _REGION_ENTRY_PROTECT : 0) \| _REGION_ENTRY_INVALID;
				140	if ((pud_val(pud) & mask) != 0)
				141	return 0;
				142	VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
				143
				144	refs = 0;
				145	head = pud_page(pud);
				146	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
				147	do {
				148	VM_BUG_ON_PAGE(compound_head(page) != head, page);
				149	pages[*nr] = page;
				150	(*nr)++;
				151	page++;
				152	refs++;
				153	} while (addr += PAGE_SIZE, addr != end);
				154
Vlastimil Babka	5224c40	2019-11-29 10:03:49 +0100	[diff] [blame]	155	if (WARN_ON_ONCE(page_ref_count(head) < 0)
				156	\|\| !page_cache_add_speculative(head, refs)) {
Gerald Schaefer	d08de8e	2016-07-04 14:47:01 +0200	[diff] [blame]	157	*nr -= refs;
				158	return 0;
				159	}
				160
				161	if (unlikely(pud_val(pud) != pud_val(*pudp))) {
				162	*nr -= refs;
				163	while (refs--)
				164	put_page(head);
				165	return 0;
				166	}
				167
				168	return 1;
				169	}
				170
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	171	static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
				172	unsigned long end, int write, struct page *pages, int nr)
				173	{
				174	unsigned long next;
				175	pud_t *pudp, pud;
				176
				177	pudp = (pud_t *) pgdp;
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	178	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
				179	pudp = (pud_t *) pgd_deref(pgd);
				180	pudp += pud_index(addr);
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	181	do {
				182	pud = *pudp;
				183	barrier();
				184	next = pud_addr_end(addr, end);
				185	if (pud_none(pud))
				186	return 0;
Gerald Schaefer	d08de8e	2016-07-04 14:47:01 +0200	[diff] [blame]	187	if (unlikely(pud_large(pud))) {
				188	if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
				189	nr))
				190	return 0;
				191	} else if (!gup_pmd_range(pudp, pud, addr, next, write, pages,
				192	nr))
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	193	return 0;
				194	} while (pudp++, addr = next, addr != end);
				195
				196	return 1;
				197	}
				198
Gerald Schaefer	34cda99	2012-09-04 15:37:55 +0200	[diff] [blame]	199	/*
				200	* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
				201	* back to the regular GUP.
				202	*/
				203	int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
				204	struct page **pages)
				205	{
				206	struct mm_struct *mm = current->mm;
				207	unsigned long addr, len, end;
				208	unsigned long next, flags;
				209	pgd_t *pgdp, pgd;
				210	int nr = 0;
				211
				212	start &= PAGE_MASK;
				213	addr = start;
				214	len = (unsigned long) nr_pages << PAGE_SHIFT;
				215	end = start + len;
Heiko Carstens	eb0bf92	2013-10-08 09:29:09 +0200	[diff] [blame]	216	if ((end <= start) \|\| (end > TASK_SIZE))
Gerald Schaefer	34cda99	2012-09-04 15:37:55 +0200	[diff] [blame]	217	return 0;
Heiko Carstens	01997bb	2013-10-07 16:14:50 +0200	[diff] [blame]	218	/*
				219	* local_irq_save() doesn't prevent pagetable teardown, but does
				220	* prevent the pagetables from being freed on s390.
				221	*
				222	* So long as we atomically load page table pointers versus teardown,
				223	* we can follow the address down to the the page and take a ref on it.
				224	*/
Gerald Schaefer	34cda99	2012-09-04 15:37:55 +0200	[diff] [blame]	225	local_irq_save(flags);
				226	pgdp = pgd_offset(mm, addr);
				227	do {
				228	pgd = *pgdp;
				229	barrier();
				230	next = pgd_addr_end(addr, end);
				231	if (pgd_none(pgd))
				232	break;
				233	if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
				234	break;
				235	} while (pgdp++, addr = next, addr != end);
				236	local_irq_restore(flags);
				237
				238	return nr;
				239	}
				240
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	241	/**
				242	* get_user_pages_fast() - pin user pages in memory
				243	* @start: starting user address
				244	* @nr_pages: number of pages from start to pin
				245	* @write: whether pages will be written to
				246	* @pages: array that receives pointers to the pages pinned.
				247	* Should be at least nr_pages long.
				248	*
				249	* Attempt to pin user pages in memory without taking mm->mmap_sem.
				250	* If not successful, it will fall back to taking the lock and
				251	* calling get_user_pages().
				252	*
				253	* Returns number of pages pinned. This may be fewer than the number
				254	* requested. If nr_pages is 0 or negative, returns 0. If no pages
				255	* were pinned, returns -errno.
				256	*/
				257	int get_user_pages_fast(unsigned long start, int nr_pages, int write,
				258	struct page **pages)
				259	{
Heiko Carstens	01997bb	2013-10-07 16:14:50 +0200	[diff] [blame]	260	int nr, ret;
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	261
David Hildenbrand	40612351	2015-10-15 10:47:18 +0200	[diff] [blame]	262	might_sleep();
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	263	start &= PAGE_MASK;
Linus Torvalds	4573107	2022-01-24 14:41:50 +0100	[diff] [blame]	264	/*
				265	* The FAST_GUP case requires FOLL_WRITE even for pure reads,
				266	* because get_user_pages() may need to cause an early COW in
				267	* order to avoid confusing the normal COW routines. So only
				268	* targets that are already writable are safe to do by just
				269	* looking at the page tables.
				270	*/
				271	nr = __get_user_pages_fast(start, nr_pages, 1, pages);
Heiko Carstens	01997bb	2013-10-07 16:14:50 +0200	[diff] [blame]	272	if (nr == nr_pages)
				273	return nr;
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	274
Heiko Carstens	01997bb	2013-10-07 16:14:50 +0200	[diff] [blame]	275	/* Try to get the remaining pages with get_user_pages */
				276	start += nr << PAGE_SHIFT;
				277	pages += nr;
Lorenzo Stoakes	c164154	2016-10-13 01:20:13 +0100	[diff] [blame]	278	ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
				279	write ? FOLL_WRITE : 0);
Heiko Carstens	01997bb	2013-10-07 16:14:50 +0200	[diff] [blame]	280	/* Have to be a bit careful with return values */
				281	if (nr > 0)
				282	ret = (ret < 0) ? nr : ret + nr;
				283	return ret;
Martin Schwidefsky	8021714	2010-10-25 16:10:11 +0200	[diff] [blame]	284	}