Blame - mm/swap.c - kernel/msm

blob: 96387e20184ab85971794daeb524ed07de6f8e18 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/mm/swap.c
				3	*
				4	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
				5	*/
				6
				7	/*
				8	* This file contains the default values for the opereation of the
				9	* Linux VM subsystem. Fine-tuning documentation can be found in
				10	* Documentation/sysctl/vm.txt.
				11	* Started 18.12.91
				12	* Swap aging added 23.2.95, Stephen Tweedie.
				13	* Buffermem limits added 12.3.98, Rik van Riel.
				14	*/
				15
				16	#include <linux/mm.h>
				17	#include <linux/sched.h>
				18	#include <linux/kernel_stat.h>
				19	#include <linux/swap.h>
				20	#include <linux/mman.h>
				21	#include <linux/pagemap.h>
				22	#include <linux/pagevec.h>
				23	#include <linux/init.h>
				24	#include <linux/module.h>
				25	#include <linux/mm_inline.h>
				26	#include <linux/buffer_head.h> /* for try_to_release_page() */
				27	#include <linux/module.h>
				28	#include <linux/percpu_counter.h>
				29	#include <linux/percpu.h>
				30	#include <linux/cpu.h>
				31	#include <linux/notifier.h>
				32	#include <linux/init.h>
				33
				34	/* How many pages do we try to swap or page in/out together? */
				35	int page_cluster;
				36
				37	#ifdef CONFIG_HUGETLB_PAGE
				38
				39	void put_page(struct page *page)
				40	{
				41	if (unlikely(PageCompound(page))) {
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	42	page = (struct page *)page_private(page);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	43	if (put_page_testzero(page)) {
				44	void (dtor)(struct page page);
				45
				46	dtor = (void ()(struct page ))page[1].mapping;
				47	(*dtor)(page);
				48	}
				49	return;
				50	}
Nick Piggin	b581003	2005-10-29 18:16:12 -0700	[diff] [blame]	51	if (put_page_testzero(page))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	52	__page_cache_release(page);
				53	}
				54	EXPORT_SYMBOL(put_page);
				55	#endif
				56
				57	/*
				58	* Writeback is about to end against a page which has been marked for immediate
				59	* reclaim. If it still appears to be reclaimable, move it to the tail of the
				60	* inactive list. The page still has PageWriteback set, which will pin it.
				61	*
				62	* We don't expect many pages to come through here, so don't bother batching
				63	* things up.
				64	*
				65	* To avoid placing the page at the tail of the LRU while PG_writeback is still
				66	* set, this function will clear PG_writeback before performing the page
				67	* motion. Do that inside the lru lock because once PG_writeback is cleared
				68	* we may not touch the page.
				69	*
				70	* Returns zero if it cleared PG_writeback.
				71	*/
				72	int rotate_reclaimable_page(struct page *page)
				73	{
				74	struct zone *zone;
				75	unsigned long flags;
				76
				77	if (PageLocked(page))
				78	return 1;
				79	if (PageDirty(page))
				80	return 1;
				81	if (PageActive(page))
				82	return 1;
				83	if (!PageLRU(page))
				84	return 1;
				85
				86	zone = page_zone(page);
				87	spin_lock_irqsave(&zone->lru_lock, flags);
				88	if (PageLRU(page) && !PageActive(page)) {
				89	list_del(&page->lru);
				90	list_add_tail(&page->lru, &zone->inactive_list);
				91	inc_page_state(pgrotated);
				92	}
				93	if (!test_clear_page_writeback(page))
				94	BUG();
				95	spin_unlock_irqrestore(&zone->lru_lock, flags);
				96	return 0;
				97	}
				98
				99	/*
				100	* FIXME: speed this up?
				101	*/
				102	void fastcall activate_page(struct page *page)
				103	{
				104	struct zone *zone = page_zone(page);
				105
				106	spin_lock_irq(&zone->lru_lock);
				107	if (PageLRU(page) && !PageActive(page)) {
				108	del_page_from_inactive_list(zone, page);
				109	SetPageActive(page);
				110	add_page_to_active_list(zone, page);
				111	inc_page_state(pgactivate);
				112	}
				113	spin_unlock_irq(&zone->lru_lock);
				114	}
				115
				116	/*
				117	* Mark a page as having seen activity.
				118	*
				119	* inactive,unreferenced -> inactive,referenced
				120	* inactive,referenced -> active,unreferenced
				121	* active,unreferenced -> active,referenced
				122	*/
				123	void fastcall mark_page_accessed(struct page *page)
				124	{
				125	if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
				126	activate_page(page);
				127	ClearPageReferenced(page);
				128	} else if (!PageReferenced(page)) {
				129	SetPageReferenced(page);
				130	}
				131	}
				132
				133	EXPORT_SYMBOL(mark_page_accessed);
				134
				135	/**
				136	* lru_cache_add: add a page to the page lists
				137	* @page: the page to add
				138	*/
				139	static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
				140	static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
				141
				142	void fastcall lru_cache_add(struct page *page)
				143	{
				144	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
				145
				146	page_cache_get(page);
				147	if (!pagevec_add(pvec, page))
				148	__pagevec_lru_add(pvec);
				149	put_cpu_var(lru_add_pvecs);
				150	}
				151
				152	void fastcall lru_cache_add_active(struct page *page)
				153	{
				154	struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
				155
				156	page_cache_get(page);
				157	if (!pagevec_add(pvec, page))
				158	__pagevec_lru_add_active(pvec);
				159	put_cpu_var(lru_add_active_pvecs);
				160	}
				161
				162	void lru_add_drain(void)
				163	{
				164	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
				165
				166	if (pagevec_count(pvec))
				167	__pagevec_lru_add(pvec);
				168	pvec = &__get_cpu_var(lru_add_active_pvecs);
				169	if (pagevec_count(pvec))
				170	__pagevec_lru_add_active(pvec);
				171	put_cpu_var(lru_add_pvecs);
				172	}
				173
				174	/*
				175	* This path almost never happens for VM activity - pages are normally
				176	* freed via pagevecs. But it gets used by networking.
				177	*/
				178	void fastcall __page_cache_release(struct page *page)
				179	{
				180	unsigned long flags;
				181	struct zone *zone = page_zone(page);
				182
				183	spin_lock_irqsave(&zone->lru_lock, flags);
				184	if (TestClearPageLRU(page))
				185	del_page_from_lru(zone, page);
				186	if (page_count(page) != 0)
				187	page = NULL;
				188	spin_unlock_irqrestore(&zone->lru_lock, flags);
				189	if (page)
				190	free_hot_page(page);
				191	}
				192
				193	EXPORT_SYMBOL(__page_cache_release);
				194
				195	/*
				196	* Batched page_cache_release(). Decrement the reference count on all the
				197	* passed pages. If it fell to zero then remove the page from the LRU and
				198	* free it.
				199	*
				200	* Avoid taking zone->lru_lock if possible, but if it is taken, retain it
				201	* for the remainder of the operation.
				202	*
				203	* The locking in this function is against shrink_cache(): we recheck the
				204	* page count inside the lock to see whether shrink_cache grabbed the page
				205	* via the LRU. If it did, give up: shrink_cache will free it.
				206	*/
				207	void release_pages(struct page **pages, int nr, int cold)
				208	{
				209	int i;
				210	struct pagevec pages_to_free;
				211	struct zone *zone = NULL;
				212
				213	pagevec_init(&pages_to_free, cold);
				214	for (i = 0; i < nr; i++) {
				215	struct page *page = pages[i];
				216	struct zone *pagezone;
				217
Nick Piggin	b581003	2005-10-29 18:16:12 -0700	[diff] [blame]	218	if (!put_page_testzero(page))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	219	continue;
				220
				221	pagezone = page_zone(page);
				222	if (pagezone != zone) {
				223	if (zone)
				224	spin_unlock_irq(&zone->lru_lock);
				225	zone = pagezone;
				226	spin_lock_irq(&zone->lru_lock);
				227	}
				228	if (TestClearPageLRU(page))
				229	del_page_from_lru(zone, page);
				230	if (page_count(page) == 0) {
				231	if (!pagevec_add(&pages_to_free, page)) {
				232	spin_unlock_irq(&zone->lru_lock);
				233	__pagevec_free(&pages_to_free);
				234	pagevec_reinit(&pages_to_free);
				235	zone = NULL; /* No lock is held */
				236	}
				237	}
				238	}
				239	if (zone)
				240	spin_unlock_irq(&zone->lru_lock);
				241
				242	pagevec_free(&pages_to_free);
				243	}
				244
				245	/*
				246	* The pages which we're about to release may be in the deferred lru-addition
				247	* queues. That would prevent them from really being freed right now. That's
				248	* OK from a correctness point of view but is inefficient - those pages may be
				249	* cache-warm and we want to give them back to the page allocator ASAP.
				250	*
				251	* So __pagevec_release() will drain those queues here. __pagevec_lru_add()
				252	* and __pagevec_lru_add_active() call release_pages() directly to avoid
				253	* mutual recursion.
				254	*/
				255	void __pagevec_release(struct pagevec *pvec)
				256	{
				257	lru_add_drain();
				258	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
				259	pagevec_reinit(pvec);
				260	}
				261
				262	/*
				263	* pagevec_release() for pages which are known to not be on the LRU
				264	*
				265	* This function reinitialises the caller's pagevec.
				266	*/
				267	void __pagevec_release_nonlru(struct pagevec *pvec)
				268	{
				269	int i;
				270	struct pagevec pages_to_free;
				271
				272	pagevec_init(&pages_to_free, pvec->cold);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	273	for (i = 0; i < pagevec_count(pvec); i++) {
				274	struct page *page = pvec->pages[i];
				275
				276	BUG_ON(PageLRU(page));
				277	if (put_page_testzero(page))
				278	pagevec_add(&pages_to_free, page);
				279	}
				280	pagevec_free(&pages_to_free);
				281	pagevec_reinit(pvec);
				282	}
				283
				284	/*
				285	* Add the passed pages to the LRU, then drop the caller's refcount
				286	* on them. Reinitialises the caller's pagevec.
				287	*/
				288	void __pagevec_lru_add(struct pagevec *pvec)
				289	{
				290	int i;
				291	struct zone *zone = NULL;
				292
				293	for (i = 0; i < pagevec_count(pvec); i++) {
				294	struct page *page = pvec->pages[i];
				295	struct zone *pagezone = page_zone(page);
				296
				297	if (pagezone != zone) {
				298	if (zone)
				299	spin_unlock_irq(&zone->lru_lock);
				300	zone = pagezone;
				301	spin_lock_irq(&zone->lru_lock);
				302	}
				303	if (TestSetPageLRU(page))
				304	BUG();
				305	add_page_to_inactive_list(zone, page);
				306	}
				307	if (zone)
				308	spin_unlock_irq(&zone->lru_lock);
				309	release_pages(pvec->pages, pvec->nr, pvec->cold);
				310	pagevec_reinit(pvec);
				311	}
				312
				313	EXPORT_SYMBOL(__pagevec_lru_add);
				314
				315	void __pagevec_lru_add_active(struct pagevec *pvec)
				316	{
				317	int i;
				318	struct zone *zone = NULL;
				319
				320	for (i = 0; i < pagevec_count(pvec); i++) {
				321	struct page *page = pvec->pages[i];
				322	struct zone *pagezone = page_zone(page);
				323
				324	if (pagezone != zone) {
				325	if (zone)
				326	spin_unlock_irq(&zone->lru_lock);
				327	zone = pagezone;
				328	spin_lock_irq(&zone->lru_lock);
				329	}
				330	if (TestSetPageLRU(page))
				331	BUG();
				332	if (TestSetPageActive(page))
				333	BUG();
				334	add_page_to_active_list(zone, page);
				335	}
				336	if (zone)
				337	spin_unlock_irq(&zone->lru_lock);
				338	release_pages(pvec->pages, pvec->nr, pvec->cold);
				339	pagevec_reinit(pvec);
				340	}
				341
				342	/*
				343	* Try to drop buffers from the pages in a pagevec
				344	*/
				345	void pagevec_strip(struct pagevec *pvec)
				346	{
				347	int i;
				348
				349	for (i = 0; i < pagevec_count(pvec); i++) {
				350	struct page *page = pvec->pages[i];
				351
				352	if (PagePrivate(page) && !TestSetPageLocked(page)) {
				353	try_to_release_page(page, 0);
				354	unlock_page(page);
				355	}
				356	}
				357	}
				358
				359	/**
				360	* pagevec_lookup - gang pagecache lookup
				361	* @pvec: Where the resulting pages are placed
				362	* @mapping: The address_space to search
				363	* @start: The starting page index
				364	* @nr_pages: The maximum number of pages
				365	*
				366	* pagevec_lookup() will search for and return a group of up to @nr_pages pages
				367	* in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a
				368	* reference against the pages in @pvec.
				369	*
				370	* The search returns a group of mapping-contiguous pages with ascending
				371	* indexes. There may be holes in the indices due to not-present pages.
				372	*
				373	* pagevec_lookup() returns the number of pages which were found.
				374	*/
				375	unsigned pagevec_lookup(struct pagevec pvec, struct address_space mapping,
				376	pgoff_t start, unsigned nr_pages)
				377	{
				378	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
				379	return pagevec_count(pvec);
				380	}
				381
				382	unsigned pagevec_lookup_tag(struct pagevec pvec, struct address_space mapping,
				383	pgoff_t *index, int tag, unsigned nr_pages)
				384	{
				385	pvec->nr = find_get_pages_tag(mapping, index, tag,
				386	nr_pages, pvec->pages);
				387	return pagevec_count(pvec);
				388	}
				389
				390
				391	#ifdef CONFIG_SMP
				392	/*
				393	* We tolerate a little inaccuracy to avoid ping-ponging the counter between
				394	* CPUs
				395	*/
				396	#define ACCT_THRESHOLD max(16, NR_CPUS * 2)
				397
				398	static DEFINE_PER_CPU(long, committed_space) = 0;
				399
				400	void vm_acct_memory(long pages)
				401	{
				402	long *local;
				403
				404	preempt_disable();
				405	local = &__get_cpu_var(committed_space);
				406	*local += pages;
				407	if (local > ACCT_THRESHOLD \|\| local < -ACCT_THRESHOLD) {
				408	atomic_add(*local, &vm_committed_space);
				409	*local = 0;
				410	}
				411	preempt_enable();
				412	}
				413	EXPORT_SYMBOL(vm_acct_memory);
				414
				415	#ifdef CONFIG_HOTPLUG_CPU
				416	static void lru_drain_cache(unsigned int cpu)
				417	{
				418	struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu);
				419
				420	/* CPU is dead, so no locking needed. */
				421	if (pagevec_count(pvec))
				422	__pagevec_lru_add(pvec);
				423	pvec = &per_cpu(lru_add_active_pvecs, cpu);
				424	if (pagevec_count(pvec))
				425	__pagevec_lru_add_active(pvec);
				426	}
				427
				428	/* Drop the CPU's cached committed space back into the central pool. */
				429	static int cpu_swap_callback(struct notifier_block *nfb,
				430	unsigned long action,
				431	void *hcpu)
				432	{
				433	long *committed;
				434
				435	committed = &per_cpu(committed_space, (long)hcpu);
				436	if (action == CPU_DEAD) {
				437	atomic_add(*committed, &vm_committed_space);
				438	*committed = 0;
				439	lru_drain_cache((long)hcpu);
				440	}
				441	return NOTIFY_OK;
				442	}
				443	#endif /* CONFIG_HOTPLUG_CPU */
				444	#endif /* CONFIG_SMP */
				445
				446	#ifdef CONFIG_SMP
				447	void percpu_counter_mod(struct percpu_counter *fbc, long amount)
				448	{
				449	long count;
				450	long *pcount;
				451	int cpu = get_cpu();
				452
				453	pcount = per_cpu_ptr(fbc->counters, cpu);
				454	count = *pcount + amount;
				455	if (count >= FBC_BATCH \|\| count <= -FBC_BATCH) {
				456	spin_lock(&fbc->lock);
				457	fbc->count += count;
				458	spin_unlock(&fbc->lock);
				459	count = 0;
				460	}
				461	*pcount = count;
				462	put_cpu();
				463	}
				464	EXPORT_SYMBOL(percpu_counter_mod);
				465	#endif
				466
				467	/*
				468	* Perform any setup for the swap system
				469	*/
				470	void __init swap_setup(void)
				471	{
				472	unsigned long megs = num_physpages >> (20 - PAGE_SHIFT);
				473
				474	/* Use a smaller cluster for small-memory machines */
				475	if (megs < 16)
				476	page_cluster = 2;
				477	else
				478	page_cluster = 3;
				479	/*
				480	* Right now other parts of the system means that we
				481	* _really_ don't want to cluster much more
				482	*/
				483	hotcpu_notifier(cpu_swap_callback, 0);
				484	}