Blame - mm/migrate.c - kernel/msm-4.19

blob: be3f141e53a42a4aad9d4b5678f06a3f343154d2 [file] [log] [blame]

Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1	/*
				2	* Memory Migration functionality - linux/mm/migration.c
				3	*
				4	* Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
				5	*
				6	* Page migration was first developed in the context of the memory hotplug
				7	* project. The main authors of the migration code are:
				8	*
				9	* IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
				10	* Hirokazu Takahashi <taka@valinux.co.jp>
				11	* Dave Hansen <haveblue@us.ibm.com>
				12	* Christoph Lameter <clameter@sgi.com>
				13	*/
				14
				15	#include <linux/migrate.h>
				16	#include <linux/module.h>
				17	#include <linux/swap.h>
				18	#include <linux/pagemap.h>
Christoph Lameter	e23ca00	2006-04-10 22:52:57 -0700	[diff] [blame]	19	#include <linux/buffer_head.h>
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	20	#include <linux/mm_inline.h>
				21	#include <linux/pagevec.h>
				22	#include <linux/rmap.h>
				23	#include <linux/topology.h>
				24	#include <linux/cpu.h>
				25	#include <linux/cpuset.h>
				26	#include <linux/swapops.h>
				27
				28	#include "internal.h"
				29
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	30	/* The maximum number of pages to take off the LRU for migration */
				31	#define MIGRATE_CHUNK_SIZE 256
				32
				33	#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
				34
				35	/*
				36	* Isolate one page from the LRU lists. If successful put it onto
				37	* the indicated list with elevated page count.
				38	*
				39	* Result:
				40	* -EBUSY: page not on LRU list
				41	* 0: page removed from LRU list and added to the specified list.
				42	*/
				43	int isolate_lru_page(struct page page, struct list_head pagelist)
				44	{
				45	int ret = -EBUSY;
				46
				47	if (PageLRU(page)) {
				48	struct zone *zone = page_zone(page);
				49
				50	spin_lock_irq(&zone->lru_lock);
				51	if (PageLRU(page)) {
				52	ret = 0;
				53	get_page(page);
				54	ClearPageLRU(page);
				55	if (PageActive(page))
				56	del_page_from_active_list(zone, page);
				57	else
				58	del_page_from_inactive_list(zone, page);
				59	list_add_tail(&page->lru, pagelist);
				60	}
				61	spin_unlock_irq(&zone->lru_lock);
				62	}
				63	return ret;
				64	}
				65
				66	/*
				67	* migrate_prep() needs to be called after we have compiled the list of pages
				68	* to be migrated using isolate_lru_page() but before we begin a series of calls
				69	* to migrate_pages().
				70	*/
				71	int migrate_prep(void)
				72	{
				73	/* Must have swap device for migration */
				74	if (nr_swap_pages <= 0)
				75	return -ENODEV;
				76
				77	/*
				78	* Clear the LRU lists so pages can be isolated.
				79	* Note that pages may be moved off the LRU after we have
				80	* drained them. Those pages will fail to migrate like other
				81	* pages that may be busy.
				82	*/
				83	lru_add_drain_all();
				84
				85	return 0;
				86	}
				87
				88	static inline void move_to_lru(struct page *page)
				89	{
				90	list_del(&page->lru);
				91	if (PageActive(page)) {
				92	/*
				93	* lru_cache_add_active checks that
				94	* the PG_active bit is off.
				95	*/
				96	ClearPageActive(page);
				97	lru_cache_add_active(page);
				98	} else {
				99	lru_cache_add(page);
				100	}
				101	put_page(page);
				102	}
				103
				104	/*
				105	* Add isolated pages on the list back to the LRU.
				106	*
				107	* returns the number of pages put back.
				108	*/
				109	int putback_lru_pages(struct list_head *l)
				110	{
				111	struct page *page;
				112	struct page *page2;
				113	int count = 0;
				114
				115	list_for_each_entry_safe(page, page2, l, lru) {
				116	move_to_lru(page);
				117	count++;
				118	}
				119	return count;
				120	}
				121
				122	/*
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	123	* swapout a single page
				124	* page is locked upon entry, unlocked on exit
				125	*/
				126	static int swap_page(struct page *page)
				127	{
				128	struct address_space *mapping = page_mapping(page);
				129
				130	if (page_mapped(page) && mapping)
				131	if (try_to_unmap(page, 1) != SWAP_SUCCESS)
				132	goto unlock_retry;
				133
				134	if (PageDirty(page)) {
				135	/* Page is dirty, try to write it out here */
				136	switch(pageout(page, mapping)) {
				137	case PAGE_KEEP:
				138	case PAGE_ACTIVATE:
				139	goto unlock_retry;
				140
				141	case PAGE_SUCCESS:
				142	goto retry;
				143
				144	case PAGE_CLEAN:
				145	; /* try to free the page below */
				146	}
				147	}
				148
				149	if (PagePrivate(page)) {
				150	if (!try_to_release_page(page, GFP_KERNEL) \|\|
				151	(!mapping && page_count(page) == 1))
				152	goto unlock_retry;
				153	}
				154
				155	if (remove_mapping(mapping, page)) {
				156	/* Success */
				157	unlock_page(page);
				158	return 0;
				159	}
				160
				161	unlock_retry:
				162	unlock_page(page);
				163
				164	retry:
				165	return -EAGAIN;
				166	}
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	167
				168	/*
				169	* Remove references for a page and establish the new page with the correct
				170	* basic settings to be able to stop accesses to the page.
				171	*/
Christoph Lameter	e7340f7	2006-06-23 02:03:29 -0700	[diff] [blame^]	172	static int migrate_page_remove_references(struct page *newpage,
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	173	struct page *page, int nr_refs)
				174	{
				175	struct address_space *mapping = page_mapping(page);
				176	struct page **radix_pointer;
				177
				178	/*
				179	* Avoid doing any of the following work if the page count
				180	* indicates that the page is in use or truncate has removed
				181	* the page.
				182	*/
				183	if (!mapping \|\| page_mapcount(page) + nr_refs != page_count(page))
				184	return -EAGAIN;
				185
				186	/*
				187	* Establish swap ptes for anonymous pages or destroy pte
				188	* maps for files.
				189	*
				190	* In order to reestablish file backed mappings the fault handlers
				191	* will take the radix tree_lock which may then be used to stop
				192	* processses from accessing this page until the new page is ready.
				193	*
				194	* A process accessing via a swap pte (an anonymous page) will take a
				195	* page_lock on the old page which will block the process until the
				196	* migration attempt is complete. At that time the PageSwapCache bit
				197	* will be examined. If the page was migrated then the PageSwapCache
				198	* bit will be clear and the operation to retrieve the page will be
				199	* retried which will find the new page in the radix tree. Then a new
				200	* direct mapping may be generated based on the radix tree contents.
				201	*
				202	* If the page was not migrated then the PageSwapCache bit
				203	* is still set and the operation may continue.
				204	*/
				205	if (try_to_unmap(page, 1) == SWAP_FAIL)
				206	/* A vma has VM_LOCKED set -> permanent failure */
				207	return -EPERM;
				208
				209	/*
				210	* Give up if we were unable to remove all mappings.
				211	*/
				212	if (page_mapcount(page))
				213	return -EAGAIN;
				214
				215	write_lock_irq(&mapping->tree_lock);
				216
				217	radix_pointer = (struct page **)radix_tree_lookup_slot(
				218	&mapping->page_tree,
				219	page_index(page));
				220
				221	if (!page_mapping(page) \|\| page_count(page) != nr_refs \|\|
				222	*radix_pointer != page) {
				223	write_unlock_irq(&mapping->tree_lock);
Christoph Lameter	e23ca00	2006-04-10 22:52:57 -0700	[diff] [blame]	224	return -EAGAIN;
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	225	}
				226
				227	/*
				228	* Now we know that no one else is looking at the page.
				229	*
				230	* Certain minimal information about a page must be available
				231	* in order for other subsystems to properly handle the page if they
				232	* find it through the radix tree update before we are finished
				233	* copying the page.
				234	*/
				235	get_page(newpage);
				236	newpage->index = page->index;
				237	newpage->mapping = page->mapping;
				238	if (PageSwapCache(page)) {
				239	SetPageSwapCache(newpage);
				240	set_page_private(newpage, page_private(page));
				241	}
				242
				243	*radix_pointer = newpage;
				244	__put_page(page);
				245	write_unlock_irq(&mapping->tree_lock);
				246
				247	return 0;
				248	}
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	249
				250	/*
				251	* Copy the page to its new location
				252	*/
Christoph Lameter	e7340f7	2006-06-23 02:03:29 -0700	[diff] [blame^]	253	static void migrate_page_copy(struct page newpage, struct page page)
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	254	{
				255	copy_highpage(newpage, page);
				256
				257	if (PageError(page))
				258	SetPageError(newpage);
				259	if (PageReferenced(page))
				260	SetPageReferenced(newpage);
				261	if (PageUptodate(page))
				262	SetPageUptodate(newpage);
				263	if (PageActive(page))
				264	SetPageActive(newpage);
				265	if (PageChecked(page))
				266	SetPageChecked(newpage);
				267	if (PageMappedToDisk(page))
				268	SetPageMappedToDisk(newpage);
				269
				270	if (PageDirty(page)) {
				271	clear_page_dirty_for_io(page);
				272	set_page_dirty(newpage);
				273	}
				274
				275	ClearPageSwapCache(page);
				276	ClearPageActive(page);
				277	ClearPagePrivate(page);
				278	set_page_private(page, 0);
				279	page->mapping = NULL;
				280
				281	/*
				282	* If any waiters have accumulated on the new page then
				283	* wake them up.
				284	*/
				285	if (PageWriteback(newpage))
				286	end_page_writeback(newpage);
				287	}
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	288
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	289	/************************************************************
				290	* Migration functions
				291	***********************************************************/
				292
				293	/* Always fail migration. Used for mappings that are not movable */
				294	int fail_migrate_page(struct page newpage, struct page page)
				295	{
				296	return -EIO;
				297	}
				298	EXPORT_SYMBOL(fail_migrate_page);
				299
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	300	/*
				301	* Common logic to directly migrate a single page suitable for
				302	* pages that do not use PagePrivate.
				303	*
				304	* Pages are locked upon entry and exit.
				305	*/
				306	int migrate_page(struct page newpage, struct page page)
				307	{
				308	int rc;
				309
				310	BUG_ON(PageWriteback(page)); /* Writeback must be complete */
				311
				312	rc = migrate_page_remove_references(newpage, page, 2);
				313
				314	if (rc)
				315	return rc;
				316
				317	migrate_page_copy(newpage, page);
				318
				319	/*
				320	* Remove auxiliary swap entries and replace
				321	* them with real ptes.
				322	*
				323	* Note that a real pte entry will allow processes that are not
				324	* waiting on the page lock to use the new page via the page tables
				325	* before the new page is unlocked.
				326	*/
				327	remove_from_swap(newpage);
				328	return 0;
				329	}
				330	EXPORT_SYMBOL(migrate_page);
				331
				332	/*
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	333	* Migration function for pages with buffers. This function can only be used
				334	* if the underlying filesystem guarantees that no other references to "page"
				335	* exist.
				336	*/
				337	int buffer_migrate_page(struct page newpage, struct page page)
				338	{
				339	struct address_space *mapping = page->mapping;
				340	struct buffer_head bh, head;
				341	int rc;
				342
				343	if (!mapping)
				344	return -EAGAIN;
				345
				346	if (!page_has_buffers(page))
				347	return migrate_page(newpage, page);
				348
				349	head = page_buffers(page);
				350
				351	rc = migrate_page_remove_references(newpage, page, 3);
				352
				353	if (rc)
				354	return rc;
				355
				356	bh = head;
				357	do {
				358	get_bh(bh);
				359	lock_buffer(bh);
				360	bh = bh->b_this_page;
				361
				362	} while (bh != head);
				363
				364	ClearPagePrivate(page);
				365	set_page_private(newpage, page_private(page));
				366	set_page_private(page, 0);
				367	put_page(page);
				368	get_page(newpage);
				369
				370	bh = head;
				371	do {
				372	set_bh_page(bh, newpage, bh_offset(bh));
				373	bh = bh->b_this_page;
				374
				375	} while (bh != head);
				376
				377	SetPagePrivate(newpage);
				378
				379	migrate_page_copy(newpage, page);
				380
				381	bh = head;
				382	do {
				383	unlock_buffer(bh);
				384	put_bh(bh);
				385	bh = bh->b_this_page;
				386
				387	} while (bh != head);
				388
				389	return 0;
				390	}
				391	EXPORT_SYMBOL(buffer_migrate_page);
				392
				393	/*
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	394	* migrate_pages
				395	*
				396	* Two lists are passed to this function. The first list
				397	* contains the pages isolated from the LRU to be migrated.
				398	* The second list contains new pages that the pages isolated
				399	* can be moved to. If the second list is NULL then all
				400	* pages are swapped out.
				401	*
				402	* The function returns after 10 attempts or if no pages
				403	* are movable anymore because to has become empty
				404	* or no retryable pages exist anymore.
				405	*
				406	* Return: Number of pages not migrated when "to" ran empty.
				407	*/
				408	int migrate_pages(struct list_head from, struct list_head to,
				409	struct list_head moved, struct list_head failed)
				410	{
				411	int retry;
				412	int nr_failed = 0;
				413	int pass = 0;
				414	struct page *page;
				415	struct page *page2;
				416	int swapwrite = current->flags & PF_SWAPWRITE;
				417	int rc;
				418
				419	if (!swapwrite)
				420	current->flags \|= PF_SWAPWRITE;
				421
				422	redo:
				423	retry = 0;
				424
				425	list_for_each_entry_safe(page, page2, from, lru) {
				426	struct page *newpage = NULL;
				427	struct address_space *mapping;
				428
				429	cond_resched();
				430
				431	rc = 0;
				432	if (page_count(page) == 1)
				433	/* page was freed from under us. So we are done. */
				434	goto next;
				435
				436	if (to && list_empty(to))
				437	break;
				438
				439	/*
				440	* Skip locked pages during the first two passes to give the
				441	* functions holding the lock time to release the page. Later we
				442	* use lock_page() to have a higher chance of acquiring the
				443	* lock.
				444	*/
				445	rc = -EAGAIN;
				446	if (pass > 2)
				447	lock_page(page);
				448	else
				449	if (TestSetPageLocked(page))
				450	goto next;
				451
				452	/*
				453	* Only wait on writeback if we have already done a pass where
				454	* we we may have triggered writeouts for lots of pages.
				455	*/
				456	if (pass > 0) {
				457	wait_on_page_writeback(page);
				458	} else {
				459	if (PageWriteback(page))
				460	goto unlock_page;
				461	}
				462
				463	/*
				464	* Anonymous pages must have swap cache references otherwise
				465	* the information contained in the page maps cannot be
				466	* preserved.
				467	*/
				468	if (PageAnon(page) && !PageSwapCache(page)) {
				469	if (!add_to_swap(page, GFP_KERNEL)) {
				470	rc = -ENOMEM;
				471	goto unlock_page;
				472	}
				473	}
				474
				475	if (!to) {
				476	rc = swap_page(page);
				477	goto next;
				478	}
				479
				480	newpage = lru_to_page(to);
				481	lock_page(newpage);
				482
				483	/*
				484	* Pages are properly locked and writeback is complete.
				485	* Try to migrate the page.
				486	*/
				487	mapping = page_mapping(page);
				488	if (!mapping)
				489	goto unlock_both;
				490
				491	if (mapping->a_ops->migratepage) {
				492	/*
				493	* Most pages have a mapping and most filesystems
				494	* should provide a migration function. Anonymous
				495	* pages are part of swap space which also has its
				496	* own migration function. This is the most common
				497	* path for page migration.
				498	*/
				499	rc = mapping->a_ops->migratepage(newpage, page);
				500	goto unlock_both;
				501	}
				502
Christoph Lameter	4c28f81	2006-05-01 12:16:08 -0700	[diff] [blame]	503	/* Make sure the dirty bit is up to date */
				504	if (try_to_unmap(page, 1) == SWAP_FAIL) {
				505	rc = -EPERM;
				506	goto unlock_both;
				507	}
				508
				509	if (page_mapcount(page)) {
				510	rc = -EAGAIN;
				511	goto unlock_both;
				512	}
				513
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	514	/*
				515	* Default handling if a filesystem does not provide
				516	* a migration function. We can only migrate clean
				517	* pages so try to write out any dirty pages first.
				518	*/
				519	if (PageDirty(page)) {
				520	switch (pageout(page, mapping)) {
				521	case PAGE_KEEP:
				522	case PAGE_ACTIVATE:
				523	goto unlock_both;
				524
				525	case PAGE_SUCCESS:
				526	unlock_page(newpage);
				527	goto next;
				528
				529	case PAGE_CLEAN:
				530	; /* try to migrate the page below */
				531	}
				532	}
				533
				534	/*
				535	* Buffers are managed in a filesystem specific way.
				536	* We must have no buffers or drop them.
				537	*/
				538	if (!page_has_buffers(page) \|\|
				539	try_to_release_page(page, GFP_KERNEL)) {
				540	rc = migrate_page(newpage, page);
				541	goto unlock_both;
				542	}
				543
				544	/*
				545	* On early passes with mapped pages simply
				546	* retry. There may be a lock held for some
				547	* buffers that may go away. Later
				548	* swap them out.
				549	*/
				550	if (pass > 4) {
				551	/*
				552	* Persistently unable to drop buffers..... As a
				553	* measure of last resort we fall back to
				554	* swap_page().
				555	*/
				556	unlock_page(newpage);
				557	newpage = NULL;
				558	rc = swap_page(page);
				559	goto next;
				560	}
				561
				562	unlock_both:
				563	unlock_page(newpage);
				564
				565	unlock_page:
				566	unlock_page(page);
				567
				568	next:
				569	if (rc == -EAGAIN) {
				570	retry++;
				571	} else if (rc) {
				572	/* Permanent failure */
				573	list_move(&page->lru, failed);
				574	nr_failed++;
				575	} else {
				576	if (newpage) {
				577	/* Successful migration. Return page to LRU */
				578	move_to_lru(newpage);
				579	}
				580	list_move(&page->lru, moved);
				581	}
				582	}
				583	if (retry && pass++ < 10)
				584	goto redo;
				585
				586	if (!swapwrite)
				587	current->flags &= ~PF_SWAPWRITE;
				588
				589	return nr_failed + retry;
				590	}
				591
				592	/*
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	593	* Migrate the list 'pagelist' of pages to a certain destination.
				594	*
				595	* Specify destination with either non-NULL vma or dest_node >= 0
				596	* Return the number of pages not migrated or error code
				597	*/
				598	int migrate_pages_to(struct list_head *pagelist,
				599	struct vm_area_struct *vma, int dest)
				600	{
				601	LIST_HEAD(newlist);
				602	LIST_HEAD(moved);
				603	LIST_HEAD(failed);
				604	int err = 0;
				605	unsigned long offset = 0;
				606	int nr_pages;
				607	struct page *page;
				608	struct list_head *p;
				609
				610	redo:
				611	nr_pages = 0;
				612	list_for_each(p, pagelist) {
				613	if (vma) {
				614	/*
				615	* The address passed to alloc_page_vma is used to
				616	* generate the proper interleave behavior. We fake
				617	* the address here by an increasing offset in order
				618	* to get the proper distribution of pages.
				619	*
				620	* No decision has been made as to which page
				621	* a certain old page is moved to so we cannot
				622	* specify the correct address.
				623	*/
				624	page = alloc_page_vma(GFP_HIGHUSER, vma,
				625	offset + vma->vm_start);
				626	offset += PAGE_SIZE;
				627	}
				628	else
				629	page = alloc_pages_node(dest, GFP_HIGHUSER, 0);
				630
				631	if (!page) {
				632	err = -ENOMEM;
				633	goto out;
				634	}
				635	list_add_tail(&page->lru, &newlist);
				636	nr_pages++;
				637	if (nr_pages > MIGRATE_CHUNK_SIZE)
				638	break;
				639	}
				640	err = migrate_pages(pagelist, &newlist, &moved, &failed);
				641
				642	putback_lru_pages(&moved); /* Call release pages instead ?? */
				643
				644	if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist))
				645	goto redo;
				646	out:
				647	/* Return leftover allocated pages */
				648	while (!list_empty(&newlist)) {
				649	page = list_entry(newlist.next, struct page, lru);
				650	list_del(&page->lru);
				651	__free_page(page);
				652	}
				653	list_splice(&failed, pagelist);
				654	if (err < 0)
				655	return err;
				656
				657	/* Calculate number of leftover pages */
				658	nr_pages = 0;
				659	list_for_each(p, pagelist)
				660	nr_pages++;
				661	return nr_pages;
				662	}