Blame - mm/compaction.c - kernel/msm-4.9

blob: f61f77983ff4e1ec15759370c3caa4ba199254ca [file] [log] [blame]

Mel Gorman	748446b	2010-05-24 14:32:27 -0700	[diff] [blame]	1	/*
				2	* linux/mm/compaction.c
				3	*
				4	* Memory compaction for the reduction of external fragmentation. Note that
				5	* this heavily depends upon page migration to do all the real heavy
				6	* lifting
				7	*
				8	* Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
				9	*/
				10	#include <linux/swap.h>
				11	#include <linux/migrate.h>
				12	#include <linux/compaction.h>
				13	#include <linux/mm_inline.h>
				14	#include <linux/backing-dev.h>
Mel Gorman	76ab0f5	2010-05-24 14:32:28 -0700	[diff] [blame]	15	#include <linux/sysctl.h>
Mel Gorman	ed4a6d7	2010-05-24 14:32:29 -0700	[diff] [blame^]	16	#include <linux/sysfs.h>
Mel Gorman	748446b	2010-05-24 14:32:27 -0700	[diff] [blame]	17	#include "internal.h"
				18
				19	/*
				20	* compact_control is used to track pages being migrated and the free pages
				21	* they are being migrated to during memory compaction. The free_pfn starts
				22	* at the end of a zone and migrate_pfn begins at the start. Movable pages
				23	* are moved to the end of a zone during a compaction run and the run
				24	* completes when free_pfn <= migrate_pfn
				25	*/
				26	struct compact_control {
				27	struct list_head freepages; /* List of free pages to migrate to */
				28	struct list_head migratepages; /* List of pages being migrated */
				29	unsigned long nr_freepages; /* Number of isolated free pages */
				30	unsigned long nr_migratepages; /* Number of pages to migrate */
				31	unsigned long free_pfn; /* isolate_freepages search base */
				32	unsigned long migrate_pfn; /* isolate_migratepages search base */
				33
				34	/* Account for isolated anon and file pages */
				35	unsigned long nr_anon;
				36	unsigned long nr_file;
				37
				38	struct zone *zone;
				39	};
				40
				41	static unsigned long release_freepages(struct list_head *freelist)
				42	{
				43	struct page page, next;
				44	unsigned long count = 0;
				45
				46	list_for_each_entry_safe(page, next, freelist, lru) {
				47	list_del(&page->lru);
				48	__free_page(page);
				49	count++;
				50	}
				51
				52	return count;
				53	}
				54
				55	/* Isolate free pages onto a private freelist. Must hold zone->lock */
				56	static unsigned long isolate_freepages_block(struct zone *zone,
				57	unsigned long blockpfn,
				58	struct list_head *freelist)
				59	{
				60	unsigned long zone_end_pfn, end_pfn;
				61	int total_isolated = 0;
				62	struct page *cursor;
				63
				64	/* Get the last PFN we should scan for free pages at */
				65	zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
				66	end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn);
				67
				68	/* Find the first usable PFN in the block to initialse page cursor */
				69	for (; blockpfn < end_pfn; blockpfn++) {
				70	if (pfn_valid_within(blockpfn))
				71	break;
				72	}
				73	cursor = pfn_to_page(blockpfn);
				74
				75	/* Isolate free pages. This assumes the block is valid */
				76	for (; blockpfn < end_pfn; blockpfn++, cursor++) {
				77	int isolated, i;
				78	struct page *page = cursor;
				79
				80	if (!pfn_valid_within(blockpfn))
				81	continue;
				82
				83	if (!PageBuddy(page))
				84	continue;
				85
				86	/* Found a free page, break it into order-0 pages */
				87	isolated = split_free_page(page);
				88	total_isolated += isolated;
				89	for (i = 0; i < isolated; i++) {
				90	list_add(&page->lru, freelist);
				91	page++;
				92	}
				93
				94	/* If a page was split, advance to the end of it */
				95	if (isolated) {
				96	blockpfn += isolated - 1;
				97	cursor += isolated - 1;
				98	}
				99	}
				100
				101	return total_isolated;
				102	}
				103
				104	/* Returns true if the page is within a block suitable for migration to */
				105	static bool suitable_migration_target(struct page *page)
				106	{
				107
				108	int migratetype = get_pageblock_migratetype(page);
				109
				110	/* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
				111	if (migratetype == MIGRATE_ISOLATE \|\| migratetype == MIGRATE_RESERVE)
				112	return false;
				113
				114	/* If the page is a large free page, then allow migration */
				115	if (PageBuddy(page) && page_order(page) >= pageblock_order)
				116	return true;
				117
				118	/* If the block is MIGRATE_MOVABLE, allow migration */
				119	if (migratetype == MIGRATE_MOVABLE)
				120	return true;
				121
				122	/* Otherwise skip the block */
				123	return false;
				124	}
				125
				126	/*
				127	* Based on information in the current compact_control, find blocks
				128	* suitable for isolating free pages from and then isolate them.
				129	*/
				130	static void isolate_freepages(struct zone *zone,
				131	struct compact_control *cc)
				132	{
				133	struct page *page;
				134	unsigned long high_pfn, low_pfn, pfn;
				135	unsigned long flags;
				136	int nr_freepages = cc->nr_freepages;
				137	struct list_head *freelist = &cc->freepages;
				138
				139	pfn = cc->free_pfn;
				140	low_pfn = cc->migrate_pfn + pageblock_nr_pages;
				141	high_pfn = low_pfn;
				142
				143	/*
				144	* Isolate free pages until enough are available to migrate the
				145	* pages on cc->migratepages. We stop searching if the migrate
				146	* and free page scanners meet or enough free pages are isolated.
				147	*/
				148	spin_lock_irqsave(&zone->lock, flags);
				149	for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
				150	pfn -= pageblock_nr_pages) {
				151	unsigned long isolated;
				152
				153	if (!pfn_valid(pfn))
				154	continue;
				155
				156	/*
				157	* Check for overlapping nodes/zones. It's possible on some
				158	* configurations to have a setup like
				159	* node0 node1 node0
				160	* i.e. it's possible that all pages within a zones range of
				161	* pages do not belong to a single zone.
				162	*/
				163	page = pfn_to_page(pfn);
				164	if (page_zone(page) != zone)
				165	continue;
				166
				167	/* Check the block is suitable for migration */
				168	if (!suitable_migration_target(page))
				169	continue;
				170
				171	/* Found a block suitable for isolating free pages from */
				172	isolated = isolate_freepages_block(zone, pfn, freelist);
				173	nr_freepages += isolated;
				174
				175	/*
				176	* Record the highest PFN we isolated pages from. When next
				177	* looking for free pages, the search will restart here as
				178	* page migration may have returned some pages to the allocator
				179	*/
				180	if (isolated)
				181	high_pfn = max(high_pfn, pfn);
				182	}
				183	spin_unlock_irqrestore(&zone->lock, flags);
				184
				185	/* split_free_page does not map the pages */
				186	list_for_each_entry(page, freelist, lru) {
				187	arch_alloc_page(page, 0);
				188	kernel_map_pages(page, 1, 1);
				189	}
				190
				191	cc->free_pfn = high_pfn;
				192	cc->nr_freepages = nr_freepages;
				193	}
				194
				195	/* Update the number of anon and file isolated pages in the zone */
				196	static void acct_isolated(struct zone zone, struct compact_control cc)
				197	{
				198	struct page *page;
				199	unsigned int count[NR_LRU_LISTS] = { 0, };
				200
				201	list_for_each_entry(page, &cc->migratepages, lru) {
				202	int lru = page_lru_base_type(page);
				203	count[lru]++;
				204	}
				205
				206	cc->nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
				207	cc->nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
				208	__mod_zone_page_state(zone, NR_ISOLATED_ANON, cc->nr_anon);
				209	__mod_zone_page_state(zone, NR_ISOLATED_FILE, cc->nr_file);
				210	}
				211
				212	/* Similar to reclaim, but different enough that they don't share logic */
				213	static bool too_many_isolated(struct zone *zone)
				214	{
				215
				216	unsigned long inactive, isolated;
				217
				218	inactive = zone_page_state(zone, NR_INACTIVE_FILE) +
				219	zone_page_state(zone, NR_INACTIVE_ANON);
				220	isolated = zone_page_state(zone, NR_ISOLATED_FILE) +
				221	zone_page_state(zone, NR_ISOLATED_ANON);
				222
				223	return isolated > inactive;
				224	}
				225
				226	/*
				227	* Isolate all pages that can be migrated from the block pointed to by
				228	* the migrate scanner within compact_control.
				229	*/
				230	static unsigned long isolate_migratepages(struct zone *zone,
				231	struct compact_control *cc)
				232	{
				233	unsigned long low_pfn, end_pfn;
				234	struct list_head *migratelist = &cc->migratepages;
				235
				236	/* Do not scan outside zone boundaries */
				237	low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn);
				238
				239	/* Only scan within a pageblock boundary */
				240	end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages);
				241
				242	/* Do not cross the free scanner or scan within a memory hole */
				243	if (end_pfn > cc->free_pfn \|\| !pfn_valid(low_pfn)) {
				244	cc->migrate_pfn = end_pfn;
				245	return 0;
				246	}
				247
				248	/*
				249	* Ensure that there are not too many pages isolated from the LRU
				250	* list by either parallel reclaimers or compaction. If there are,
				251	* delay for some time until fewer pages are isolated
				252	*/
				253	while (unlikely(too_many_isolated(zone))) {
				254	congestion_wait(BLK_RW_ASYNC, HZ/10);
				255
				256	if (fatal_signal_pending(current))
				257	return 0;
				258	}
				259
				260	/* Time to isolate some pages for migration */
				261	spin_lock_irq(&zone->lru_lock);
				262	for (; low_pfn < end_pfn; low_pfn++) {
				263	struct page *page;
				264	if (!pfn_valid_within(low_pfn))
				265	continue;
				266
				267	/* Get the page and skip if free */
				268	page = pfn_to_page(low_pfn);
				269	if (PageBuddy(page))
				270	continue;
				271
				272	/* Try isolate the page */
				273	if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0)
				274	continue;
				275
				276	/* Successfully isolated */
				277	del_page_from_lru_list(zone, page, page_lru(page));
				278	list_add(&page->lru, migratelist);
				279	mem_cgroup_del_lru(page);
				280	cc->nr_migratepages++;
				281
				282	/* Avoid isolating too much */
				283	if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
				284	break;
				285	}
				286
				287	acct_isolated(zone, cc);
				288
				289	spin_unlock_irq(&zone->lru_lock);
				290	cc->migrate_pfn = low_pfn;
				291
				292	return cc->nr_migratepages;
				293	}
				294
				295	/*
				296	* This is a migrate-callback that "allocates" freepages by taking pages
				297	* from the isolated freelists in the block we are migrating to.
				298	*/
				299	static struct page compaction_alloc(struct page migratepage,
				300	unsigned long data,
				301	int **result)
				302	{
				303	struct compact_control cc = (struct compact_control )data;
				304	struct page *freepage;
				305
				306	/* Isolate free pages if necessary */
				307	if (list_empty(&cc->freepages)) {
				308	isolate_freepages(cc->zone, cc);
				309
				310	if (list_empty(&cc->freepages))
				311	return NULL;
				312	}
				313
				314	freepage = list_entry(cc->freepages.next, struct page, lru);
				315	list_del(&freepage->lru);
				316	cc->nr_freepages--;
				317
				318	return freepage;
				319	}
				320
				321	/*
				322	* We cannot control nr_migratepages and nr_freepages fully when migration is
				323	* running as migrate_pages() has no knowledge of compact_control. When
				324	* migration is complete, we count the number of pages on the lists by hand.
				325	*/
				326	static void update_nr_listpages(struct compact_control *cc)
				327	{
				328	int nr_migratepages = 0;
				329	int nr_freepages = 0;
				330	struct page *page;
				331
				332	list_for_each_entry(page, &cc->migratepages, lru)
				333	nr_migratepages++;
				334	list_for_each_entry(page, &cc->freepages, lru)
				335	nr_freepages++;
				336
				337	cc->nr_migratepages = nr_migratepages;
				338	cc->nr_freepages = nr_freepages;
				339	}
				340
				341	static int compact_finished(struct zone *zone,
				342	struct compact_control *cc)
				343	{
				344	if (fatal_signal_pending(current))
				345	return COMPACT_PARTIAL;
				346
				347	/* Compaction run completes if the migrate and free scanner meet */
				348	if (cc->free_pfn <= cc->migrate_pfn)
				349	return COMPACT_COMPLETE;
				350
				351	return COMPACT_CONTINUE;
				352	}
				353
				354	static int compact_zone(struct zone zone, struct compact_control cc)
				355	{
				356	int ret;
				357
				358	/* Setup to move all movable pages to the end of the zone */
				359	cc->migrate_pfn = zone->zone_start_pfn;
				360	cc->free_pfn = cc->migrate_pfn + zone->spanned_pages;
				361	cc->free_pfn &= ~(pageblock_nr_pages-1);
				362
				363	migrate_prep_local();
				364
				365	while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
				366	unsigned long nr_migrate, nr_remaining;
				367
				368	if (!isolate_migratepages(zone, cc))
				369	continue;
				370
				371	nr_migrate = cc->nr_migratepages;
				372	migrate_pages(&cc->migratepages, compaction_alloc,
				373	(unsigned long)cc, 0);
				374	update_nr_listpages(cc);
				375	nr_remaining = cc->nr_migratepages;
				376
				377	count_vm_event(COMPACTBLOCKS);
				378	count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining);
				379	if (nr_remaining)
				380	count_vm_events(COMPACTPAGEFAILED, nr_remaining);
				381
				382	/* Release LRU pages not migrated */
				383	if (!list_empty(&cc->migratepages)) {
				384	putback_lru_pages(&cc->migratepages);
				385	cc->nr_migratepages = 0;
				386	}
				387
				388	}
				389
				390	/* Release free pages and check accounting */
				391	cc->nr_freepages -= release_freepages(&cc->freepages);
				392	VM_BUG_ON(cc->nr_freepages != 0);
				393
				394	return ret;
				395	}
Mel Gorman	76ab0f5	2010-05-24 14:32:28 -0700	[diff] [blame]	396
				397	/* Compact all zones within a node */
				398	static int compact_node(int nid)
				399	{
				400	int zoneid;
				401	pg_data_t *pgdat;
				402	struct zone *zone;
				403
				404	if (nid < 0 \|\| nid >= nr_node_ids \|\| !node_online(nid))
				405	return -EINVAL;
				406	pgdat = NODE_DATA(nid);
				407
				408	/* Flush pending updates to the LRU lists */
				409	lru_add_drain_all();
				410
				411	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
				412	struct compact_control cc = {
				413	.nr_freepages = 0,
				414	.nr_migratepages = 0,
				415	};
				416
				417	zone = &pgdat->node_zones[zoneid];
				418	if (!populated_zone(zone))
				419	continue;
				420
				421	cc.zone = zone;
				422	INIT_LIST_HEAD(&cc.freepages);
				423	INIT_LIST_HEAD(&cc.migratepages);
				424
				425	compact_zone(zone, &cc);
				426
				427	VM_BUG_ON(!list_empty(&cc.freepages));
				428	VM_BUG_ON(!list_empty(&cc.migratepages));
				429	}
				430
				431	return 0;
				432	}
				433
				434	/* Compact all nodes in the system */
				435	static int compact_nodes(void)
				436	{
				437	int nid;
				438
				439	for_each_online_node(nid)
				440	compact_node(nid);
				441
				442	return COMPACT_COMPLETE;
				443	}
				444
				445	/* The written value is actually unused, all memory is compacted */
				446	int sysctl_compact_memory;
				447
				448	/* This is the entry point for compacting all nodes via /proc/sys/vm */
				449	int sysctl_compaction_handler(struct ctl_table *table, int write,
				450	void __user buffer, size_t length, loff_t *ppos)
				451	{
				452	if (write)
				453	return compact_nodes();
				454
				455	return 0;
				456	}
Mel Gorman	ed4a6d7	2010-05-24 14:32:29 -0700	[diff] [blame^]	457
				458	#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
				459	ssize_t sysfs_compact_node(struct sys_device *dev,
				460	struct sysdev_attribute *attr,
				461	const char *buf, size_t count)
				462	{
				463	compact_node(dev->id);
				464
				465	return count;
				466	}
				467	static SYSDEV_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node);
				468
				469	int compaction_register_node(struct node *node)
				470	{
				471	return sysdev_create_file(&node->sysdev, &attr_compact);
				472	}
				473
				474	void compaction_unregister_node(struct node *node)
				475	{
				476	return sysdev_remove_file(&node->sysdev, &attr_compact);
				477	}
				478	#endif /* CONFIG_SYSFS && CONFIG_NUMA */