Blame - mm/swap.c - kernel/msm-4.9

blob: d09cf7f03e767e57e500ac5f6dd4c35c72c5f9a4 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/mm/swap.c
				3	*
				4	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
				5	*/
				6
				7	/*
				8	* This file contains the default values for the opereation of the
				9	* Linux VM subsystem. Fine-tuning documentation can be found in
				10	* Documentation/sysctl/vm.txt.
				11	* Started 18.12.91
				12	* Swap aging added 23.2.95, Stephen Tweedie.
				13	* Buffermem limits added 12.3.98, Rik van Riel.
				14	*/
				15
				16	#include <linux/mm.h>
				17	#include <linux/sched.h>
				18	#include <linux/kernel_stat.h>
				19	#include <linux/swap.h>
				20	#include <linux/mman.h>
				21	#include <linux/pagemap.h>
				22	#include <linux/pagevec.h>
				23	#include <linux/init.h>
				24	#include <linux/module.h>
				25	#include <linux/mm_inline.h>
				26	#include <linux/buffer_head.h> /* for try_to_release_page() */
				27	#include <linux/module.h>
				28	#include <linux/percpu_counter.h>
				29	#include <linux/percpu.h>
				30	#include <linux/cpu.h>
				31	#include <linux/notifier.h>
				32	#include <linux/init.h>
				33
				34	/* How many pages do we try to swap or page in/out together? */
				35	int page_cluster;
				36
				37	#ifdef CONFIG_HUGETLB_PAGE
				38
				39	void put_page(struct page *page)
				40	{
				41	if (unlikely(PageCompound(page))) {
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	42	page = (struct page *)page_private(page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	43	if (put_page_testzero(page)) {
				44	void (dtor)(struct page page);
				45
				46	dtor = (void ()(struct page ))page[1].mapping;
				47	(*dtor)(page);
				48	}
				49	return;
				50	}
Nick Piggin	b581003	2005-10-29 18:16:12 -0700	[diff] [blame]	51	if (put_page_testzero(page))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	52	__page_cache_release(page);
				53	}
				54	EXPORT_SYMBOL(put_page);
				55	#endif
				56
				57	/*
				58	* Writeback is about to end against a page which has been marked for immediate
				59	* reclaim. If it still appears to be reclaimable, move it to the tail of the
				60	* inactive list. The page still has PageWriteback set, which will pin it.
				61	*
				62	* We don't expect many pages to come through here, so don't bother batching
				63	* things up.
				64	*
				65	* To avoid placing the page at the tail of the LRU while PG_writeback is still
				66	* set, this function will clear PG_writeback before performing the page
				67	* motion. Do that inside the lru lock because once PG_writeback is cleared
				68	* we may not touch the page.
				69	*
				70	* Returns zero if it cleared PG_writeback.
				71	*/
				72	int rotate_reclaimable_page(struct page *page)
				73	{
				74	struct zone *zone;
				75	unsigned long flags;
				76
				77	if (PageLocked(page))
				78	return 1;
				79	if (PageDirty(page))
				80	return 1;
				81	if (PageActive(page))
				82	return 1;
				83	if (!PageLRU(page))
				84	return 1;
				85
				86	zone = page_zone(page);
				87	spin_lock_irqsave(&zone->lru_lock, flags);
				88	if (PageLRU(page) && !PageActive(page)) {
				89	list_del(&page->lru);
				90	list_add_tail(&page->lru, &zone->inactive_list);
				91	inc_page_state(pgrotated);
				92	}
				93	if (!test_clear_page_writeback(page))
				94	BUG();
				95	spin_unlock_irqrestore(&zone->lru_lock, flags);
				96	return 0;
				97	}
				98
				99	/*
				100	* FIXME: speed this up?
				101	*/
				102	void fastcall activate_page(struct page *page)
				103	{
				104	struct zone *zone = page_zone(page);
				105
				106	spin_lock_irq(&zone->lru_lock);
				107	if (PageLRU(page) && !PageActive(page)) {
				108	del_page_from_inactive_list(zone, page);
				109	SetPageActive(page);
				110	add_page_to_active_list(zone, page);
				111	inc_page_state(pgactivate);
				112	}
				113	spin_unlock_irq(&zone->lru_lock);
				114	}
				115
				116	/*
				117	* Mark a page as having seen activity.
				118	*
				119	* inactive,unreferenced -> inactive,referenced
				120	* inactive,referenced -> active,unreferenced
				121	* active,unreferenced -> active,referenced
				122	*/
				123	void fastcall mark_page_accessed(struct page *page)
				124	{
				125	if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
				126	activate_page(page);
				127	ClearPageReferenced(page);
				128	} else if (!PageReferenced(page)) {
				129	SetPageReferenced(page);
				130	}
				131	}
				132
				133	EXPORT_SYMBOL(mark_page_accessed);
				134
				135	/**
				136	* lru_cache_add: add a page to the page lists
				137	* @page: the page to add
				138	*/
				139	static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
				140	static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
				141
				142	void fastcall lru_cache_add(struct page *page)
				143	{
				144	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
				145
				146	page_cache_get(page);
				147	if (!pagevec_add(pvec, page))
				148	__pagevec_lru_add(pvec);
				149	put_cpu_var(lru_add_pvecs);
				150	}
				151
				152	void fastcall lru_cache_add_active(struct page *page)
				153	{
				154	struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
				155
				156	page_cache_get(page);
				157	if (!pagevec_add(pvec, page))
				158	__pagevec_lru_add_active(pvec);
				159	put_cpu_var(lru_add_active_pvecs);
				160	}
				161
				162	void lru_add_drain(void)
				163	{
				164	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
				165
				166	if (pagevec_count(pvec))
				167	__pagevec_lru_add(pvec);
				168	pvec = &__get_cpu_var(lru_add_active_pvecs);
				169	if (pagevec_count(pvec))
				170	__pagevec_lru_add_active(pvec);
				171	put_cpu_var(lru_add_pvecs);
				172	}
				173
				174	/*
				175	* This path almost never happens for VM activity - pages are normally
				176	* freed via pagevecs. But it gets used by networking.
				177	*/
				178	void fastcall __page_cache_release(struct page *page)
				179	{
				180	unsigned long flags;
				181	struct zone *zone = page_zone(page);
				182
				183	spin_lock_irqsave(&zone->lru_lock, flags);
				184	if (TestClearPageLRU(page))
				185	del_page_from_lru(zone, page);
				186	if (page_count(page) != 0)
				187	page = NULL;
				188	spin_unlock_irqrestore(&zone->lru_lock, flags);
				189	if (page)
				190	free_hot_page(page);
				191	}
				192
				193	EXPORT_SYMBOL(__page_cache_release);
				194
				195	/*
				196	* Batched page_cache_release(). Decrement the reference count on all the
				197	* passed pages. If it fell to zero then remove the page from the LRU and
				198	* free it.
				199	*
				200	* Avoid taking zone->lru_lock if possible, but if it is taken, retain it
				201	* for the remainder of the operation.
				202	*
				203	* The locking in this function is against shrink_cache(): we recheck the
				204	* page count inside the lock to see whether shrink_cache grabbed the page
				205	* via the LRU. If it did, give up: shrink_cache will free it.
				206	*/
				207	void release_pages(struct page **pages, int nr, int cold)
				208	{
				209	int i;
				210	struct pagevec pages_to_free;
				211	struct zone *zone = NULL;
				212
				213	pagevec_init(&pages_to_free, cold);
				214	for (i = 0; i < nr; i++) {
				215	struct page *page = pages[i];
				216	struct zone *pagezone;
				217
Nick Piggin	b581003	2005-10-29 18:16:12 -0700	[diff] [blame]	218	if (!put_page_testzero(page))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	219	continue;
				220
				221	pagezone = page_zone(page);
				222	if (pagezone != zone) {
				223	if (zone)
				224	spin_unlock_irq(&zone->lru_lock);
				225	zone = pagezone;
				226	spin_lock_irq(&zone->lru_lock);
				227	}
				228	if (TestClearPageLRU(page))
				229	del_page_from_lru(zone, page);
				230	if (page_count(page) == 0) {
				231	if (!pagevec_add(&pages_to_free, page)) {
				232	spin_unlock_irq(&zone->lru_lock);
				233	__pagevec_free(&pages_to_free);
				234	pagevec_reinit(&pages_to_free);
				235	zone = NULL; /* No lock is held */
				236	}
				237	}
				238	}
				239	if (zone)
				240	spin_unlock_irq(&zone->lru_lock);
				241
				242	pagevec_free(&pages_to_free);
				243	}
				244
				245	/*
				246	* The pages which we're about to release may be in the deferred lru-addition
				247	* queues. That would prevent them from really being freed right now. That's
				248	* OK from a correctness point of view but is inefficient - those pages may be
				249	* cache-warm and we want to give them back to the page allocator ASAP.
				250	*
				251	* So __pagevec_release() will drain those queues here. __pagevec_lru_add()
				252	* and __pagevec_lru_add_active() call release_pages() directly to avoid
				253	* mutual recursion.
				254	*/
				255	void __pagevec_release(struct pagevec *pvec)
				256	{
				257	lru_add_drain();
				258	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
				259	pagevec_reinit(pvec);
				260	}
				261
Steve French	7f28570	2005-11-01 10:22:55 -0800	[diff] [blame]	262	EXPORT_SYMBOL(__pagevec_release);
				263
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	264	/*
				265	* pagevec_release() for pages which are known to not be on the LRU
				266	*
				267	* This function reinitialises the caller's pagevec.
				268	*/
				269	void __pagevec_release_nonlru(struct pagevec *pvec)
				270	{
				271	int i;
				272	struct pagevec pages_to_free;
				273
				274	pagevec_init(&pages_to_free, pvec->cold);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	275	for (i = 0; i < pagevec_count(pvec); i++) {
				276	struct page *page = pvec->pages[i];
				277
				278	BUG_ON(PageLRU(page));
				279	if (put_page_testzero(page))
				280	pagevec_add(&pages_to_free, page);
				281	}
				282	pagevec_free(&pages_to_free);
				283	pagevec_reinit(pvec);
				284	}
				285
				286	/*
				287	* Add the passed pages to the LRU, then drop the caller's refcount
				288	* on them. Reinitialises the caller's pagevec.
				289	*/
				290	void __pagevec_lru_add(struct pagevec *pvec)
				291	{
				292	int i;
				293	struct zone *zone = NULL;
				294
				295	for (i = 0; i < pagevec_count(pvec); i++) {
				296	struct page *page = pvec->pages[i];
				297	struct zone *pagezone = page_zone(page);
				298
				299	if (pagezone != zone) {
				300	if (zone)
				301	spin_unlock_irq(&zone->lru_lock);
				302	zone = pagezone;
				303	spin_lock_irq(&zone->lru_lock);
				304	}
				305	if (TestSetPageLRU(page))
				306	BUG();
				307	add_page_to_inactive_list(zone, page);
				308	}
				309	if (zone)
				310	spin_unlock_irq(&zone->lru_lock);
				311	release_pages(pvec->pages, pvec->nr, pvec->cold);
				312	pagevec_reinit(pvec);
				313	}
				314
				315	EXPORT_SYMBOL(__pagevec_lru_add);
				316
				317	void __pagevec_lru_add_active(struct pagevec *pvec)
				318	{
				319	int i;
				320	struct zone *zone = NULL;
				321
				322	for (i = 0; i < pagevec_count(pvec); i++) {
				323	struct page *page = pvec->pages[i];
				324	struct zone *pagezone = page_zone(page);
				325
				326	if (pagezone != zone) {
				327	if (zone)
				328	spin_unlock_irq(&zone->lru_lock);
				329	zone = pagezone;
				330	spin_lock_irq(&zone->lru_lock);
				331	}
				332	if (TestSetPageLRU(page))
				333	BUG();
				334	if (TestSetPageActive(page))
				335	BUG();
				336	add_page_to_active_list(zone, page);
				337	}
				338	if (zone)
				339	spin_unlock_irq(&zone->lru_lock);
				340	release_pages(pvec->pages, pvec->nr, pvec->cold);
				341	pagevec_reinit(pvec);
				342	}
				343
				344	/*
				345	* Try to drop buffers from the pages in a pagevec
				346	*/
				347	void pagevec_strip(struct pagevec *pvec)
				348	{
				349	int i;
				350
				351	for (i = 0; i < pagevec_count(pvec); i++) {
				352	struct page *page = pvec->pages[i];
				353
				354	if (PagePrivate(page) && !TestSetPageLocked(page)) {
				355	try_to_release_page(page, 0);
				356	unlock_page(page);
				357	}
				358	}
				359	}
				360
				361	/**
				362	* pagevec_lookup - gang pagecache lookup
				363	* @pvec: Where the resulting pages are placed
				364	* @mapping: The address_space to search
				365	* @start: The starting page index
				366	* @nr_pages: The maximum number of pages
				367	*
				368	* pagevec_lookup() will search for and return a group of up to @nr_pages pages
				369	* in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a
				370	* reference against the pages in @pvec.
				371	*
				372	* The search returns a group of mapping-contiguous pages with ascending
				373	* indexes. There may be holes in the indices due to not-present pages.
				374	*
				375	* pagevec_lookup() returns the number of pages which were found.
				376	*/
				377	unsigned pagevec_lookup(struct pagevec pvec, struct address_space mapping,
				378	pgoff_t start, unsigned nr_pages)
				379	{
				380	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
				381	return pagevec_count(pvec);
				382	}
				383
				384	unsigned pagevec_lookup_tag(struct pagevec pvec, struct address_space mapping,
				385	pgoff_t *index, int tag, unsigned nr_pages)
				386	{
				387	pvec->nr = find_get_pages_tag(mapping, index, tag,
				388	nr_pages, pvec->pages);
				389	return pagevec_count(pvec);
				390	}
				391
Steve French	7f28570	2005-11-01 10:22:55 -0800	[diff] [blame]	392	EXPORT_SYMBOL(pagevec_lookup_tag);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	393
				394	#ifdef CONFIG_SMP
				395	/*
				396	* We tolerate a little inaccuracy to avoid ping-ponging the counter between
				397	* CPUs
				398	*/
				399	#define ACCT_THRESHOLD max(16, NR_CPUS * 2)
				400
				401	static DEFINE_PER_CPU(long, committed_space) = 0;
				402
				403	void vm_acct_memory(long pages)
				404	{
				405	long *local;
				406
				407	preempt_disable();
				408	local = &__get_cpu_var(committed_space);
				409	*local += pages;
				410	if (local > ACCT_THRESHOLD \|\| local < -ACCT_THRESHOLD) {
				411	atomic_add(*local, &vm_committed_space);
				412	*local = 0;
				413	}
				414	preempt_enable();
				415	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	416
				417	#ifdef CONFIG_HOTPLUG_CPU
				418	static void lru_drain_cache(unsigned int cpu)
				419	{
				420	struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu);
				421
				422	/* CPU is dead, so no locking needed. */
				423	if (pagevec_count(pvec))
				424	__pagevec_lru_add(pvec);
				425	pvec = &per_cpu(lru_add_active_pvecs, cpu);
				426	if (pagevec_count(pvec))
				427	__pagevec_lru_add_active(pvec);
				428	}
				429
				430	/* Drop the CPU's cached committed space back into the central pool. */
				431	static int cpu_swap_callback(struct notifier_block *nfb,
				432	unsigned long action,
				433	void *hcpu)
				434	{
				435	long *committed;
				436
				437	committed = &per_cpu(committed_space, (long)hcpu);
				438	if (action == CPU_DEAD) {
				439	atomic_add(*committed, &vm_committed_space);
				440	*committed = 0;
				441	lru_drain_cache((long)hcpu);
				442	}
				443	return NOTIFY_OK;
				444	}
				445	#endif /* CONFIG_HOTPLUG_CPU */
				446	#endif /* CONFIG_SMP */
				447
				448	#ifdef CONFIG_SMP
				449	void percpu_counter_mod(struct percpu_counter *fbc, long amount)
				450	{
				451	long count;
				452	long *pcount;
				453	int cpu = get_cpu();
				454
				455	pcount = per_cpu_ptr(fbc->counters, cpu);
				456	count = *pcount + amount;
				457	if (count >= FBC_BATCH \|\| count <= -FBC_BATCH) {
				458	spin_lock(&fbc->lock);
				459	fbc->count += count;
				460	spin_unlock(&fbc->lock);
				461	count = 0;
				462	}
				463	*pcount = count;
				464	put_cpu();
				465	}
				466	EXPORT_SYMBOL(percpu_counter_mod);
				467	#endif
				468
				469	/*
				470	* Perform any setup for the swap system
				471	*/
				472	void __init swap_setup(void)
				473	{
				474	unsigned long megs = num_physpages >> (20 - PAGE_SHIFT);
				475
				476	/* Use a smaller cluster for small-memory machines */
				477	if (megs < 16)
				478	page_cluster = 2;
				479	else
				480	page_cluster = 3;
				481	/*
				482	* Right now other parts of the system means that we
				483	* _really_ don't want to cluster much more
				484	*/
				485	hotcpu_notifier(cpu_swap_callback, 0);
				486	}