Blame - mm/migrate.c - kernel/msm-4.9

blob: 2803a6698dd690e1ef87e2879d313cfddbb0f535 [file] [log] [blame]

Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	1	/*
				2	* Memory Migration functionality - linux/mm/migration.c
				3	*
				4	* Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
				5	*
				6	* Page migration was first developed in the context of the memory hotplug
				7	* project. The main authors of the migration code are:
				8	*
				9	* IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
				10	* Hirokazu Takahashi <taka@valinux.co.jp>
				11	* Dave Hansen <haveblue@us.ibm.com>
				12	* Christoph Lameter <clameter@sgi.com>
				13	*/
				14
				15	#include <linux/migrate.h>
				16	#include <linux/module.h>
				17	#include <linux/swap.h>
				18	#include <linux/pagemap.h>
Christoph Lameter	e23ca00	2006-04-10 22:52:57 -0700	[diff] [blame]	19	#include <linux/buffer_head.h>
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	20	#include <linux/mm_inline.h>
				21	#include <linux/pagevec.h>
				22	#include <linux/rmap.h>
				23	#include <linux/topology.h>
				24	#include <linux/cpu.h>
				25	#include <linux/cpuset.h>
				26	#include <linux/swapops.h>
				27
				28	#include "internal.h"
				29
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	30	/* The maximum number of pages to take off the LRU for migration */
				31	#define MIGRATE_CHUNK_SIZE 256
				32
				33	#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
				34
				35	/*
				36	* Isolate one page from the LRU lists. If successful put it onto
				37	* the indicated list with elevated page count.
				38	*
				39	* Result:
				40	* -EBUSY: page not on LRU list
				41	* 0: page removed from LRU list and added to the specified list.
				42	*/
				43	int isolate_lru_page(struct page page, struct list_head pagelist)
				44	{
				45	int ret = -EBUSY;
				46
				47	if (PageLRU(page)) {
				48	struct zone *zone = page_zone(page);
				49
				50	spin_lock_irq(&zone->lru_lock);
				51	if (PageLRU(page)) {
				52	ret = 0;
				53	get_page(page);
				54	ClearPageLRU(page);
				55	if (PageActive(page))
				56	del_page_from_active_list(zone, page);
				57	else
				58	del_page_from_inactive_list(zone, page);
				59	list_add_tail(&page->lru, pagelist);
				60	}
				61	spin_unlock_irq(&zone->lru_lock);
				62	}
				63	return ret;
				64	}
				65
				66	/*
				67	* migrate_prep() needs to be called after we have compiled the list of pages
				68	* to be migrated using isolate_lru_page() but before we begin a series of calls
				69	* to migrate_pages().
				70	*/
				71	int migrate_prep(void)
				72	{
				73	/* Must have swap device for migration */
				74	if (nr_swap_pages <= 0)
				75	return -ENODEV;
				76
				77	/*
				78	* Clear the LRU lists so pages can be isolated.
				79	* Note that pages may be moved off the LRU after we have
				80	* drained them. Those pages will fail to migrate like other
				81	* pages that may be busy.
				82	*/
				83	lru_add_drain_all();
				84
				85	return 0;
				86	}
				87
				88	static inline void move_to_lru(struct page *page)
				89	{
				90	list_del(&page->lru);
				91	if (PageActive(page)) {
				92	/*
				93	* lru_cache_add_active checks that
				94	* the PG_active bit is off.
				95	*/
				96	ClearPageActive(page);
				97	lru_cache_add_active(page);
				98	} else {
				99	lru_cache_add(page);
				100	}
				101	put_page(page);
				102	}
				103
				104	/*
				105	* Add isolated pages on the list back to the LRU.
				106	*
				107	* returns the number of pages put back.
				108	*/
				109	int putback_lru_pages(struct list_head *l)
				110	{
				111	struct page *page;
				112	struct page *page2;
				113	int count = 0;
				114
				115	list_for_each_entry_safe(page, page2, l, lru) {
				116	move_to_lru(page);
				117	count++;
				118	}
				119	return count;
				120	}
				121
				122	/*
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	123	* swapout a single page
				124	* page is locked upon entry, unlocked on exit
				125	*/
				126	static int swap_page(struct page *page)
				127	{
				128	struct address_space *mapping = page_mapping(page);
				129
				130	if (page_mapped(page) && mapping)
				131	if (try_to_unmap(page, 1) != SWAP_SUCCESS)
				132	goto unlock_retry;
				133
				134	if (PageDirty(page)) {
				135	/* Page is dirty, try to write it out here */
				136	switch(pageout(page, mapping)) {
				137	case PAGE_KEEP:
				138	case PAGE_ACTIVATE:
				139	goto unlock_retry;
				140
				141	case PAGE_SUCCESS:
				142	goto retry;
				143
				144	case PAGE_CLEAN:
				145	; /* try to free the page below */
				146	}
				147	}
				148
				149	if (PagePrivate(page)) {
				150	if (!try_to_release_page(page, GFP_KERNEL) \|\|
				151	(!mapping && page_count(page) == 1))
				152	goto unlock_retry;
				153	}
				154
				155	if (remove_mapping(mapping, page)) {
				156	/* Success */
				157	unlock_page(page);
				158	return 0;
				159	}
				160
				161	unlock_retry:
				162	unlock_page(page);
				163
				164	retry:
				165	return -EAGAIN;
				166	}
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	167
				168	/*
				169	* Remove references for a page and establish the new page with the correct
				170	* basic settings to be able to stop accesses to the page.
Christoph Lameter	5b5c712	2006-06-23 02:03:29 -0700	[diff] [blame^]	171	*
				172	* The number of remaining references must be:
				173	* 1 for anonymous pages without a mapping
				174	* 2 for pages with a mapping
				175	* 3 for pages with a mapping and PagePrivate set.
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	176	*/
Christoph Lameter	e7340f7	2006-06-23 02:03:29 -0700	[diff] [blame]	177	static int migrate_page_remove_references(struct page *newpage,
Christoph Lameter	5b5c712	2006-06-23 02:03:29 -0700	[diff] [blame^]	178	struct page *page)
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	179	{
				180	struct address_space *mapping = page_mapping(page);
				181	struct page **radix_pointer;
				182
Christoph Lameter	5b5c712	2006-06-23 02:03:29 -0700	[diff] [blame^]	183	if (!mapping)
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	184	return -EAGAIN;
				185
				186	/*
				187	* Establish swap ptes for anonymous pages or destroy pte
				188	* maps for files.
				189	*
				190	* In order to reestablish file backed mappings the fault handlers
				191	* will take the radix tree_lock which may then be used to stop
				192	* processses from accessing this page until the new page is ready.
				193	*
				194	* A process accessing via a swap pte (an anonymous page) will take a
				195	* page_lock on the old page which will block the process until the
				196	* migration attempt is complete. At that time the PageSwapCache bit
				197	* will be examined. If the page was migrated then the PageSwapCache
				198	* bit will be clear and the operation to retrieve the page will be
				199	* retried which will find the new page in the radix tree. Then a new
				200	* direct mapping may be generated based on the radix tree contents.
				201	*
				202	* If the page was not migrated then the PageSwapCache bit
				203	* is still set and the operation may continue.
				204	*/
				205	if (try_to_unmap(page, 1) == SWAP_FAIL)
				206	/* A vma has VM_LOCKED set -> permanent failure */
				207	return -EPERM;
				208
				209	/*
				210	* Give up if we were unable to remove all mappings.
				211	*/
				212	if (page_mapcount(page))
				213	return -EAGAIN;
				214
				215	write_lock_irq(&mapping->tree_lock);
				216
				217	radix_pointer = (struct page **)radix_tree_lookup_slot(
				218	&mapping->page_tree,
				219	page_index(page));
				220
Christoph Lameter	5b5c712	2006-06-23 02:03:29 -0700	[diff] [blame^]	221	if (!page_mapping(page) \|\|
				222	page_count(page) != 2 + !!PagePrivate(page) \|\|
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	223	*radix_pointer != page) {
				224	write_unlock_irq(&mapping->tree_lock);
Christoph Lameter	e23ca00	2006-04-10 22:52:57 -0700	[diff] [blame]	225	return -EAGAIN;
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	226	}
				227
				228	/*
				229	* Now we know that no one else is looking at the page.
				230	*
				231	* Certain minimal information about a page must be available
				232	* in order for other subsystems to properly handle the page if they
				233	* find it through the radix tree update before we are finished
				234	* copying the page.
				235	*/
				236	get_page(newpage);
				237	newpage->index = page->index;
				238	newpage->mapping = page->mapping;
				239	if (PageSwapCache(page)) {
				240	SetPageSwapCache(newpage);
				241	set_page_private(newpage, page_private(page));
				242	}
				243
				244	*radix_pointer = newpage;
				245	__put_page(page);
				246	write_unlock_irq(&mapping->tree_lock);
				247
				248	return 0;
				249	}
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	250
				251	/*
				252	* Copy the page to its new location
				253	*/
Christoph Lameter	e7340f7	2006-06-23 02:03:29 -0700	[diff] [blame]	254	static void migrate_page_copy(struct page newpage, struct page page)
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	255	{
				256	copy_highpage(newpage, page);
				257
				258	if (PageError(page))
				259	SetPageError(newpage);
				260	if (PageReferenced(page))
				261	SetPageReferenced(newpage);
				262	if (PageUptodate(page))
				263	SetPageUptodate(newpage);
				264	if (PageActive(page))
				265	SetPageActive(newpage);
				266	if (PageChecked(page))
				267	SetPageChecked(newpage);
				268	if (PageMappedToDisk(page))
				269	SetPageMappedToDisk(newpage);
				270
				271	if (PageDirty(page)) {
				272	clear_page_dirty_for_io(page);
				273	set_page_dirty(newpage);
				274	}
				275
				276	ClearPageSwapCache(page);
				277	ClearPageActive(page);
				278	ClearPagePrivate(page);
				279	set_page_private(page, 0);
				280	page->mapping = NULL;
				281
				282	/*
				283	* If any waiters have accumulated on the new page then
				284	* wake them up.
				285	*/
				286	if (PageWriteback(newpage))
				287	end_page_writeback(newpage);
				288	}
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	289
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	290	/************************************************************
				291	* Migration functions
				292	***********************************************************/
				293
				294	/* Always fail migration. Used for mappings that are not movable */
				295	int fail_migrate_page(struct page newpage, struct page page)
				296	{
				297	return -EIO;
				298	}
				299	EXPORT_SYMBOL(fail_migrate_page);
				300
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	301	/*
				302	* Common logic to directly migrate a single page suitable for
				303	* pages that do not use PagePrivate.
				304	*
				305	* Pages are locked upon entry and exit.
				306	*/
				307	int migrate_page(struct page newpage, struct page page)
				308	{
				309	int rc;
				310
				311	BUG_ON(PageWriteback(page)); /* Writeback must be complete */
				312
Christoph Lameter	5b5c712	2006-06-23 02:03:29 -0700	[diff] [blame^]	313	rc = migrate_page_remove_references(newpage, page);
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	314
				315	if (rc)
				316	return rc;
				317
				318	migrate_page_copy(newpage, page);
				319
				320	/*
				321	* Remove auxiliary swap entries and replace
				322	* them with real ptes.
				323	*
				324	* Note that a real pte entry will allow processes that are not
				325	* waiting on the page lock to use the new page via the page tables
				326	* before the new page is unlocked.
				327	*/
				328	remove_from_swap(newpage);
				329	return 0;
				330	}
				331	EXPORT_SYMBOL(migrate_page);
				332
				333	/*
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	334	* Migration function for pages with buffers. This function can only be used
				335	* if the underlying filesystem guarantees that no other references to "page"
				336	* exist.
				337	*/
				338	int buffer_migrate_page(struct page newpage, struct page page)
				339	{
				340	struct address_space *mapping = page->mapping;
				341	struct buffer_head bh, head;
				342	int rc;
				343
				344	if (!mapping)
				345	return -EAGAIN;
				346
				347	if (!page_has_buffers(page))
				348	return migrate_page(newpage, page);
				349
				350	head = page_buffers(page);
				351
Christoph Lameter	5b5c712	2006-06-23 02:03:29 -0700	[diff] [blame^]	352	rc = migrate_page_remove_references(newpage, page);
Christoph Lameter	1d8b85c	2006-06-23 02:03:28 -0700	[diff] [blame]	353
				354	if (rc)
				355	return rc;
				356
				357	bh = head;
				358	do {
				359	get_bh(bh);
				360	lock_buffer(bh);
				361	bh = bh->b_this_page;
				362
				363	} while (bh != head);
				364
				365	ClearPagePrivate(page);
				366	set_page_private(newpage, page_private(page));
				367	set_page_private(page, 0);
				368	put_page(page);
				369	get_page(newpage);
				370
				371	bh = head;
				372	do {
				373	set_bh_page(bh, newpage, bh_offset(bh));
				374	bh = bh->b_this_page;
				375
				376	} while (bh != head);
				377
				378	SetPagePrivate(newpage);
				379
				380	migrate_page_copy(newpage, page);
				381
				382	bh = head;
				383	do {
				384	unlock_buffer(bh);
				385	put_bh(bh);
				386	bh = bh->b_this_page;
				387
				388	} while (bh != head);
				389
				390	return 0;
				391	}
				392	EXPORT_SYMBOL(buffer_migrate_page);
				393
				394	/*
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	395	* migrate_pages
				396	*
				397	* Two lists are passed to this function. The first list
				398	* contains the pages isolated from the LRU to be migrated.
				399	* The second list contains new pages that the pages isolated
				400	* can be moved to. If the second list is NULL then all
				401	* pages are swapped out.
				402	*
				403	* The function returns after 10 attempts or if no pages
				404	* are movable anymore because to has become empty
				405	* or no retryable pages exist anymore.
				406	*
				407	* Return: Number of pages not migrated when "to" ran empty.
				408	*/
				409	int migrate_pages(struct list_head from, struct list_head to,
				410	struct list_head moved, struct list_head failed)
				411	{
				412	int retry;
				413	int nr_failed = 0;
				414	int pass = 0;
				415	struct page *page;
				416	struct page *page2;
				417	int swapwrite = current->flags & PF_SWAPWRITE;
				418	int rc;
				419
				420	if (!swapwrite)
				421	current->flags \|= PF_SWAPWRITE;
				422
				423	redo:
				424	retry = 0;
				425
				426	list_for_each_entry_safe(page, page2, from, lru) {
				427	struct page *newpage = NULL;
				428	struct address_space *mapping;
				429
				430	cond_resched();
				431
				432	rc = 0;
				433	if (page_count(page) == 1)
				434	/* page was freed from under us. So we are done. */
				435	goto next;
				436
				437	if (to && list_empty(to))
				438	break;
				439
				440	/*
				441	* Skip locked pages during the first two passes to give the
				442	* functions holding the lock time to release the page. Later we
				443	* use lock_page() to have a higher chance of acquiring the
				444	* lock.
				445	*/
				446	rc = -EAGAIN;
				447	if (pass > 2)
				448	lock_page(page);
				449	else
				450	if (TestSetPageLocked(page))
				451	goto next;
				452
				453	/*
				454	* Only wait on writeback if we have already done a pass where
				455	* we we may have triggered writeouts for lots of pages.
				456	*/
				457	if (pass > 0) {
				458	wait_on_page_writeback(page);
				459	} else {
				460	if (PageWriteback(page))
				461	goto unlock_page;
				462	}
				463
				464	/*
				465	* Anonymous pages must have swap cache references otherwise
				466	* the information contained in the page maps cannot be
				467	* preserved.
				468	*/
				469	if (PageAnon(page) && !PageSwapCache(page)) {
				470	if (!add_to_swap(page, GFP_KERNEL)) {
				471	rc = -ENOMEM;
				472	goto unlock_page;
				473	}
				474	}
				475
				476	if (!to) {
				477	rc = swap_page(page);
				478	goto next;
				479	}
				480
				481	newpage = lru_to_page(to);
				482	lock_page(newpage);
				483
				484	/*
				485	* Pages are properly locked and writeback is complete.
				486	* Try to migrate the page.
				487	*/
				488	mapping = page_mapping(page);
				489	if (!mapping)
				490	goto unlock_both;
				491
				492	if (mapping->a_ops->migratepage) {
				493	/*
				494	* Most pages have a mapping and most filesystems
				495	* should provide a migration function. Anonymous
				496	* pages are part of swap space which also has its
				497	* own migration function. This is the most common
				498	* path for page migration.
				499	*/
				500	rc = mapping->a_ops->migratepage(newpage, page);
				501	goto unlock_both;
				502	}
				503
Christoph Lameter	4c28f81	2006-05-01 12:16:08 -0700	[diff] [blame]	504	/* Make sure the dirty bit is up to date */
				505	if (try_to_unmap(page, 1) == SWAP_FAIL) {
				506	rc = -EPERM;
				507	goto unlock_both;
				508	}
				509
				510	if (page_mapcount(page)) {
				511	rc = -EAGAIN;
				512	goto unlock_both;
				513	}
				514
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	515	/*
				516	* Default handling if a filesystem does not provide
				517	* a migration function. We can only migrate clean
				518	* pages so try to write out any dirty pages first.
				519	*/
				520	if (PageDirty(page)) {
				521	switch (pageout(page, mapping)) {
				522	case PAGE_KEEP:
				523	case PAGE_ACTIVATE:
				524	goto unlock_both;
				525
				526	case PAGE_SUCCESS:
				527	unlock_page(newpage);
				528	goto next;
				529
				530	case PAGE_CLEAN:
				531	; /* try to migrate the page below */
				532	}
				533	}
				534
				535	/*
				536	* Buffers are managed in a filesystem specific way.
				537	* We must have no buffers or drop them.
				538	*/
				539	if (!page_has_buffers(page) \|\|
				540	try_to_release_page(page, GFP_KERNEL)) {
				541	rc = migrate_page(newpage, page);
				542	goto unlock_both;
				543	}
				544
				545	/*
				546	* On early passes with mapped pages simply
				547	* retry. There may be a lock held for some
				548	* buffers that may go away. Later
				549	* swap them out.
				550	*/
				551	if (pass > 4) {
				552	/*
				553	* Persistently unable to drop buffers..... As a
				554	* measure of last resort we fall back to
				555	* swap_page().
				556	*/
				557	unlock_page(newpage);
				558	newpage = NULL;
				559	rc = swap_page(page);
				560	goto next;
				561	}
				562
				563	unlock_both:
				564	unlock_page(newpage);
				565
				566	unlock_page:
				567	unlock_page(page);
				568
				569	next:
				570	if (rc == -EAGAIN) {
				571	retry++;
				572	} else if (rc) {
				573	/* Permanent failure */
				574	list_move(&page->lru, failed);
				575	nr_failed++;
				576	} else {
				577	if (newpage) {
				578	/* Successful migration. Return page to LRU */
				579	move_to_lru(newpage);
				580	}
				581	list_move(&page->lru, moved);
				582	}
				583	}
				584	if (retry && pass++ < 10)
				585	goto redo;
				586
				587	if (!swapwrite)
				588	current->flags &= ~PF_SWAPWRITE;
				589
				590	return nr_failed + retry;
				591	}
				592
				593	/*
Christoph Lameter	b20a350	2006-03-22 00:09:12 -0800	[diff] [blame]	594	* Migrate the list 'pagelist' of pages to a certain destination.
				595	*
				596	* Specify destination with either non-NULL vma or dest_node >= 0
				597	* Return the number of pages not migrated or error code
				598	*/
				599	int migrate_pages_to(struct list_head *pagelist,
				600	struct vm_area_struct *vma, int dest)
				601	{
				602	LIST_HEAD(newlist);
				603	LIST_HEAD(moved);
				604	LIST_HEAD(failed);
				605	int err = 0;
				606	unsigned long offset = 0;
				607	int nr_pages;
				608	struct page *page;
				609	struct list_head *p;
				610
				611	redo:
				612	nr_pages = 0;
				613	list_for_each(p, pagelist) {
				614	if (vma) {
				615	/*
				616	* The address passed to alloc_page_vma is used to
				617	* generate the proper interleave behavior. We fake
				618	* the address here by an increasing offset in order
				619	* to get the proper distribution of pages.
				620	*
				621	* No decision has been made as to which page
				622	* a certain old page is moved to so we cannot
				623	* specify the correct address.
				624	*/
				625	page = alloc_page_vma(GFP_HIGHUSER, vma,
				626	offset + vma->vm_start);
				627	offset += PAGE_SIZE;
				628	}
				629	else
				630	page = alloc_pages_node(dest, GFP_HIGHUSER, 0);
				631
				632	if (!page) {
				633	err = -ENOMEM;
				634	goto out;
				635	}
				636	list_add_tail(&page->lru, &newlist);
				637	nr_pages++;
				638	if (nr_pages > MIGRATE_CHUNK_SIZE)
				639	break;
				640	}
				641	err = migrate_pages(pagelist, &newlist, &moved, &failed);
				642
				643	putback_lru_pages(&moved); /* Call release pages instead ?? */
				644
				645	if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist))
				646	goto redo;
				647	out:
				648	/* Return leftover allocated pages */
				649	while (!list_empty(&newlist)) {
				650	page = list_entry(newlist.next, struct page, lru);
				651	list_del(&page->lru);
				652	__free_page(page);
				653	}
				654	list_splice(&failed, pagelist);
				655	if (err < 0)
				656	return err;
				657
				658	/* Calculate number of leftover pages */
				659	nr_pages = 0;
				660	list_for_each(p, pagelist)
				661	nr_pages++;
				662	return nr_pages;
				663	}