Blame - mm/swap.c - kernel/msm-4.9

blob: e9ec06d845e808a36702c6fa64af986ece2f56a1 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/mm/swap.c
				3	*
				4	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
				5	*/
				6
				7	/*
				8	* This file contains the default values for the opereation of the
				9	* Linux VM subsystem. Fine-tuning documentation can be found in
				10	* Documentation/sysctl/vm.txt.
				11	* Started 18.12.91
				12	* Swap aging added 23.2.95, Stephen Tweedie.
				13	* Buffermem limits added 12.3.98, Rik van Riel.
				14	*/
				15
				16	#include <linux/mm.h>
				17	#include <linux/sched.h>
				18	#include <linux/kernel_stat.h>
				19	#include <linux/swap.h>
				20	#include <linux/mman.h>
				21	#include <linux/pagemap.h>
				22	#include <linux/pagevec.h>
				23	#include <linux/init.h>
				24	#include <linux/module.h>
				25	#include <linux/mm_inline.h>
				26	#include <linux/buffer_head.h> /* for try_to_release_page() */
				27	#include <linux/module.h>
				28	#include <linux/percpu_counter.h>
				29	#include <linux/percpu.h>
				30	#include <linux/cpu.h>
				31	#include <linux/notifier.h>
				32	#include <linux/init.h>
				33
				34	/* How many pages do we try to swap or page in/out together? */
				35	int page_cluster;
				36
Nick Piggin	8519fb3	2006-02-07 12:58:52 -0800	[diff] [blame]	37	static void put_compound_page(struct page *page)
				38	{
				39	page = (struct page *)page_private(page);
				40	if (put_page_testzero(page)) {
				41	void (dtor)(struct page page);
				42
Hugh Dickins	41d78ba	2006-02-14 13:52:58 -0800	[diff] [blame]	43	dtor = (void ()(struct page ))page[1].lru.next;
Nick Piggin	8519fb3	2006-02-07 12:58:52 -0800	[diff] [blame]	44	(*dtor)(page);
				45	}
				46	}
				47
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	48	void put_page(struct page *page)
				49	{
Nick Piggin	8519fb3	2006-02-07 12:58:52 -0800	[diff] [blame]	50	if (unlikely(PageCompound(page)))
				51	put_compound_page(page);
				52	else if (put_page_testzero(page))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	53	__page_cache_release(page);
				54	}
				55	EXPORT_SYMBOL(put_page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	56
				57	/*
				58	* Writeback is about to end against a page which has been marked for immediate
				59	* reclaim. If it still appears to be reclaimable, move it to the tail of the
				60	* inactive list. The page still has PageWriteback set, which will pin it.
				61	*
				62	* We don't expect many pages to come through here, so don't bother batching
				63	* things up.
				64	*
				65	* To avoid placing the page at the tail of the LRU while PG_writeback is still
				66	* set, this function will clear PG_writeback before performing the page
				67	* motion. Do that inside the lru lock because once PG_writeback is cleared
				68	* we may not touch the page.
				69	*
				70	* Returns zero if it cleared PG_writeback.
				71	*/
				72	int rotate_reclaimable_page(struct page *page)
				73	{
				74	struct zone *zone;
				75	unsigned long flags;
				76
				77	if (PageLocked(page))
				78	return 1;
				79	if (PageDirty(page))
				80	return 1;
				81	if (PageActive(page))
				82	return 1;
				83	if (!PageLRU(page))
				84	return 1;
				85
				86	zone = page_zone(page);
				87	spin_lock_irqsave(&zone->lru_lock, flags);
				88	if (PageLRU(page) && !PageActive(page)) {
				89	list_del(&page->lru);
				90	list_add_tail(&page->lru, &zone->inactive_list);
				91	inc_page_state(pgrotated);
				92	}
				93	if (!test_clear_page_writeback(page))
				94	BUG();
				95	spin_unlock_irqrestore(&zone->lru_lock, flags);
				96	return 0;
				97	}
				98
				99	/*
				100	* FIXME: speed this up?
				101	*/
				102	void fastcall activate_page(struct page *page)
				103	{
				104	struct zone *zone = page_zone(page);
				105
				106	spin_lock_irq(&zone->lru_lock);
				107	if (PageLRU(page) && !PageActive(page)) {
				108	del_page_from_inactive_list(zone, page);
				109	SetPageActive(page);
				110	add_page_to_active_list(zone, page);
				111	inc_page_state(pgactivate);
				112	}
				113	spin_unlock_irq(&zone->lru_lock);
				114	}
				115
				116	/*
				117	* Mark a page as having seen activity.
				118	*
				119	* inactive,unreferenced -> inactive,referenced
				120	* inactive,referenced -> active,unreferenced
				121	* active,unreferenced -> active,referenced
				122	*/
				123	void fastcall mark_page_accessed(struct page *page)
				124	{
				125	if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
				126	activate_page(page);
				127	ClearPageReferenced(page);
				128	} else if (!PageReferenced(page)) {
				129	SetPageReferenced(page);
				130	}
				131	}
				132
				133	EXPORT_SYMBOL(mark_page_accessed);
				134
				135	/**
				136	* lru_cache_add: add a page to the page lists
				137	* @page: the page to add
				138	*/
				139	static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
				140	static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
				141
				142	void fastcall lru_cache_add(struct page *page)
				143	{
				144	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
				145
				146	page_cache_get(page);
				147	if (!pagevec_add(pvec, page))
				148	__pagevec_lru_add(pvec);
				149	put_cpu_var(lru_add_pvecs);
				150	}
				151
				152	void fastcall lru_cache_add_active(struct page *page)
				153	{
				154	struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
				155
				156	page_cache_get(page);
				157	if (!pagevec_add(pvec, page))
				158	__pagevec_lru_add_active(pvec);
				159	put_cpu_var(lru_add_active_pvecs);
				160	}
				161
Andrew Morton	80bfed9	2006-01-06 00:11:14 -0800	[diff] [blame]	162	static void __lru_add_drain(int cpu)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	163	{
Andrew Morton	80bfed9	2006-01-06 00:11:14 -0800	[diff] [blame]	164	struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	165
Andrew Morton	80bfed9	2006-01-06 00:11:14 -0800	[diff] [blame]	166	/* CPU is dead, so no locking needed. */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	167	if (pagevec_count(pvec))
				168	__pagevec_lru_add(pvec);
Andrew Morton	80bfed9	2006-01-06 00:11:14 -0800	[diff] [blame]	169	pvec = &per_cpu(lru_add_active_pvecs, cpu);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	170	if (pagevec_count(pvec))
				171	__pagevec_lru_add_active(pvec);
Andrew Morton	80bfed9	2006-01-06 00:11:14 -0800	[diff] [blame]	172	}
				173
				174	void lru_add_drain(void)
				175	{
				176	__lru_add_drain(get_cpu());
				177	put_cpu();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	178	}
				179
Nick Piggin	053837f	2006-01-18 17:42:27 -0800	[diff] [blame]	180	#ifdef CONFIG_NUMA
				181	static void lru_add_drain_per_cpu(void *dummy)
				182	{
				183	lru_add_drain();
				184	}
				185
				186	/*
				187	* Returns 0 for success
				188	*/
				189	int lru_add_drain_all(void)
				190	{
				191	return schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
				192	}
				193
				194	#else
				195
				196	/*
				197	* Returns 0 for success
				198	*/
				199	int lru_add_drain_all(void)
				200	{
				201	lru_add_drain();
				202	return 0;
				203	}
				204	#endif
				205
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	206	/*
				207	* This path almost never happens for VM activity - pages are normally
				208	* freed via pagevecs. But it gets used by networking.
				209	*/
				210	void fastcall __page_cache_release(struct page *page)
				211	{
				212	unsigned long flags;
				213	struct zone *zone = page_zone(page);
				214
				215	spin_lock_irqsave(&zone->lru_lock, flags);
				216	if (TestClearPageLRU(page))
				217	del_page_from_lru(zone, page);
				218	if (page_count(page) != 0)
				219	page = NULL;
				220	spin_unlock_irqrestore(&zone->lru_lock, flags);
				221	if (page)
				222	free_hot_page(page);
				223	}
				224
				225	EXPORT_SYMBOL(__page_cache_release);
				226
				227	/*
				228	* Batched page_cache_release(). Decrement the reference count on all the
				229	* passed pages. If it fell to zero then remove the page from the LRU and
				230	* free it.
				231	*
				232	* Avoid taking zone->lru_lock if possible, but if it is taken, retain it
				233	* for the remainder of the operation.
				234	*
				235	* The locking in this function is against shrink_cache(): we recheck the
				236	* page count inside the lock to see whether shrink_cache grabbed the page
				237	* via the LRU. If it did, give up: shrink_cache will free it.
				238	*/
				239	void release_pages(struct page **pages, int nr, int cold)
				240	{
				241	int i;
				242	struct pagevec pages_to_free;
				243	struct zone *zone = NULL;
				244
				245	pagevec_init(&pages_to_free, cold);
				246	for (i = 0; i < nr; i++) {
				247	struct page *page = pages[i];
				248	struct zone *pagezone;
				249
Nick Piggin	8519fb3	2006-02-07 12:58:52 -0800	[diff] [blame]	250	if (unlikely(PageCompound(page))) {
				251	if (zone) {
				252	spin_unlock_irq(&zone->lru_lock);
				253	zone = NULL;
				254	}
				255	put_compound_page(page);
				256	continue;
				257	}
				258
Nick Piggin	b581003	2005-10-29 18:16:12 -0700	[diff] [blame]	259	if (!put_page_testzero(page))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	260	continue;
				261
				262	pagezone = page_zone(page);
				263	if (pagezone != zone) {
				264	if (zone)
				265	spin_unlock_irq(&zone->lru_lock);
				266	zone = pagezone;
				267	spin_lock_irq(&zone->lru_lock);
				268	}
				269	if (TestClearPageLRU(page))
				270	del_page_from_lru(zone, page);
				271	if (page_count(page) == 0) {
				272	if (!pagevec_add(&pages_to_free, page)) {
				273	spin_unlock_irq(&zone->lru_lock);
				274	__pagevec_free(&pages_to_free);
				275	pagevec_reinit(&pages_to_free);
				276	zone = NULL; /* No lock is held */
				277	}
				278	}
				279	}
				280	if (zone)
				281	spin_unlock_irq(&zone->lru_lock);
				282
				283	pagevec_free(&pages_to_free);
				284	}
				285
				286	/*
				287	* The pages which we're about to release may be in the deferred lru-addition
				288	* queues. That would prevent them from really being freed right now. That's
				289	* OK from a correctness point of view but is inefficient - those pages may be
				290	* cache-warm and we want to give them back to the page allocator ASAP.
				291	*
				292	* So __pagevec_release() will drain those queues here. __pagevec_lru_add()
				293	* and __pagevec_lru_add_active() call release_pages() directly to avoid
				294	* mutual recursion.
				295	*/
				296	void __pagevec_release(struct pagevec *pvec)
				297	{
				298	lru_add_drain();
				299	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
				300	pagevec_reinit(pvec);
				301	}
				302
Steve French	7f28570	2005-11-01 10:22:55 -0800	[diff] [blame]	303	EXPORT_SYMBOL(__pagevec_release);
				304
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	305	/*
				306	* pagevec_release() for pages which are known to not be on the LRU
				307	*
				308	* This function reinitialises the caller's pagevec.
				309	*/
				310	void __pagevec_release_nonlru(struct pagevec *pvec)
				311	{
				312	int i;
				313	struct pagevec pages_to_free;
				314
				315	pagevec_init(&pages_to_free, pvec->cold);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	316	for (i = 0; i < pagevec_count(pvec); i++) {
				317	struct page *page = pvec->pages[i];
				318
				319	BUG_ON(PageLRU(page));
				320	if (put_page_testzero(page))
				321	pagevec_add(&pages_to_free, page);
				322	}
				323	pagevec_free(&pages_to_free);
				324	pagevec_reinit(pvec);
				325	}
				326
				327	/*
				328	* Add the passed pages to the LRU, then drop the caller's refcount
				329	* on them. Reinitialises the caller's pagevec.
				330	*/
				331	void __pagevec_lru_add(struct pagevec *pvec)
				332	{
				333	int i;
				334	struct zone *zone = NULL;
				335
				336	for (i = 0; i < pagevec_count(pvec); i++) {
				337	struct page *page = pvec->pages[i];
				338	struct zone *pagezone = page_zone(page);
				339
				340	if (pagezone != zone) {
				341	if (zone)
				342	spin_unlock_irq(&zone->lru_lock);
				343	zone = pagezone;
				344	spin_lock_irq(&zone->lru_lock);
				345	}
				346	if (TestSetPageLRU(page))
				347	BUG();
				348	add_page_to_inactive_list(zone, page);
				349	}
				350	if (zone)
				351	spin_unlock_irq(&zone->lru_lock);
				352	release_pages(pvec->pages, pvec->nr, pvec->cold);
				353	pagevec_reinit(pvec);
				354	}
				355
				356	EXPORT_SYMBOL(__pagevec_lru_add);
				357
				358	void __pagevec_lru_add_active(struct pagevec *pvec)
				359	{
				360	int i;
				361	struct zone *zone = NULL;
				362
				363	for (i = 0; i < pagevec_count(pvec); i++) {
				364	struct page *page = pvec->pages[i];
				365	struct zone *pagezone = page_zone(page);
				366
				367	if (pagezone != zone) {
				368	if (zone)
				369	spin_unlock_irq(&zone->lru_lock);
				370	zone = pagezone;
				371	spin_lock_irq(&zone->lru_lock);
				372	}
				373	if (TestSetPageLRU(page))
				374	BUG();
				375	if (TestSetPageActive(page))
				376	BUG();
				377	add_page_to_active_list(zone, page);
				378	}
				379	if (zone)
				380	spin_unlock_irq(&zone->lru_lock);
				381	release_pages(pvec->pages, pvec->nr, pvec->cold);
				382	pagevec_reinit(pvec);
				383	}
				384
				385	/*
				386	* Try to drop buffers from the pages in a pagevec
				387	*/
				388	void pagevec_strip(struct pagevec *pvec)
				389	{
				390	int i;
				391
				392	for (i = 0; i < pagevec_count(pvec); i++) {
				393	struct page *page = pvec->pages[i];
				394
				395	if (PagePrivate(page) && !TestSetPageLocked(page)) {
				396	try_to_release_page(page, 0);
				397	unlock_page(page);
				398	}
				399	}
				400	}
				401
				402	/**
				403	* pagevec_lookup - gang pagecache lookup
				404	* @pvec: Where the resulting pages are placed
				405	* @mapping: The address_space to search
				406	* @start: The starting page index
				407	* @nr_pages: The maximum number of pages
				408	*
				409	* pagevec_lookup() will search for and return a group of up to @nr_pages pages
				410	* in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a
				411	* reference against the pages in @pvec.
				412	*
				413	* The search returns a group of mapping-contiguous pages with ascending
				414	* indexes. There may be holes in the indices due to not-present pages.
				415	*
				416	* pagevec_lookup() returns the number of pages which were found.
				417	*/
				418	unsigned pagevec_lookup(struct pagevec pvec, struct address_space mapping,
				419	pgoff_t start, unsigned nr_pages)
				420	{
				421	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
				422	return pagevec_count(pvec);
				423	}
				424
Christoph Hellwig	78539fd	2006-01-11 20:47:41 +1100	[diff] [blame]	425	EXPORT_SYMBOL(pagevec_lookup);
				426
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	427	unsigned pagevec_lookup_tag(struct pagevec pvec, struct address_space mapping,
				428	pgoff_t *index, int tag, unsigned nr_pages)
				429	{
				430	pvec->nr = find_get_pages_tag(mapping, index, tag,
				431	nr_pages, pvec->pages);
				432	return pagevec_count(pvec);
				433	}
				434
Steve French	7f28570	2005-11-01 10:22:55 -0800	[diff] [blame]	435	EXPORT_SYMBOL(pagevec_lookup_tag);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	436
				437	#ifdef CONFIG_SMP
				438	/*
				439	* We tolerate a little inaccuracy to avoid ping-ponging the counter between
				440	* CPUs
				441	*/
				442	#define ACCT_THRESHOLD max(16, NR_CPUS * 2)
				443
				444	static DEFINE_PER_CPU(long, committed_space) = 0;
				445
				446	void vm_acct_memory(long pages)
				447	{
				448	long *local;
				449
				450	preempt_disable();
				451	local = &__get_cpu_var(committed_space);
				452	*local += pages;
				453	if (local > ACCT_THRESHOLD \|\| local < -ACCT_THRESHOLD) {
				454	atomic_add(*local, &vm_committed_space);
				455	*local = 0;
				456	}
				457	preempt_enable();
				458	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	459
				460	#ifdef CONFIG_HOTPLUG_CPU
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	461
				462	/* Drop the CPU's cached committed space back into the central pool. */
				463	static int cpu_swap_callback(struct notifier_block *nfb,
				464	unsigned long action,
				465	void *hcpu)
				466	{
				467	long *committed;
				468
				469	committed = &per_cpu(committed_space, (long)hcpu);
				470	if (action == CPU_DEAD) {
				471	atomic_add(*committed, &vm_committed_space);
				472	*committed = 0;
Andrew Morton	80bfed9	2006-01-06 00:11:14 -0800	[diff] [blame]	473	__lru_add_drain((long)hcpu);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	474	}
				475	return NOTIFY_OK;
				476	}
				477	#endif /* CONFIG_HOTPLUG_CPU */
				478	#endif /* CONFIG_SMP */
				479
				480	#ifdef CONFIG_SMP
				481	void percpu_counter_mod(struct percpu_counter *fbc, long amount)
				482	{
				483	long count;
				484	long *pcount;
				485	int cpu = get_cpu();
				486
				487	pcount = per_cpu_ptr(fbc->counters, cpu);
				488	count = *pcount + amount;
				489	if (count >= FBC_BATCH \|\| count <= -FBC_BATCH) {
				490	spin_lock(&fbc->lock);
				491	fbc->count += count;
Andrew Morton	e2bab3d	2006-03-07 21:55:31 -0800	[diff] [blame]	492	*pcount = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	493	spin_unlock(&fbc->lock);
Andrew Morton	e2bab3d	2006-03-07 21:55:31 -0800	[diff] [blame]	494	} else {
				495	*pcount = count;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	496	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	497	put_cpu();
				498	}
				499	EXPORT_SYMBOL(percpu_counter_mod);
Andrew Morton	e2bab3d	2006-03-07 21:55:31 -0800	[diff] [blame]	500
				501	/*
				502	* Add up all the per-cpu counts, return the result. This is a more accurate
				503	* but much slower version of percpu_counter_read_positive()
				504	*/
				505	long percpu_counter_sum(struct percpu_counter *fbc)
				506	{
				507	long ret;
				508	int cpu;
				509
				510	spin_lock(&fbc->lock);
				511	ret = fbc->count;
				512	for_each_cpu(cpu) {
				513	long *pcount = per_cpu_ptr(fbc->counters, cpu);
				514	ret += *pcount;
				515	}
				516	spin_unlock(&fbc->lock);
				517	return ret < 0 ? 0 : ret;
				518	}
				519	EXPORT_SYMBOL(percpu_counter_sum);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	520	#endif
				521
				522	/*
				523	* Perform any setup for the swap system
				524	*/
				525	void __init swap_setup(void)
				526	{
				527	unsigned long megs = num_physpages >> (20 - PAGE_SHIFT);
				528
				529	/* Use a smaller cluster for small-memory machines */
				530	if (megs < 16)
				531	page_cluster = 2;
				532	else
				533	page_cluster = 3;
				534	/*
				535	* Right now other parts of the system means that we
				536	* _really_ don't want to cluster much more
				537	*/
				538	hotcpu_notifier(cpu_swap_callback, 0);
				539	}