Blame - mm/swap.c - kernel/msm-4.9

blob: 7771d2803f62a40402409733cadaebf1a72ead45 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/mm/swap.c
				3	*
				4	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
				5	*/
				6
				7	/*
				8	* This file contains the default values for the opereation of the
				9	* Linux VM subsystem. Fine-tuning documentation can be found in
				10	* Documentation/sysctl/vm.txt.
				11	* Started 18.12.91
				12	* Swap aging added 23.2.95, Stephen Tweedie.
				13	* Buffermem limits added 12.3.98, Rik van Riel.
				14	*/
				15
				16	#include <linux/mm.h>
				17	#include <linux/sched.h>
				18	#include <linux/kernel_stat.h>
				19	#include <linux/swap.h>
				20	#include <linux/mman.h>
				21	#include <linux/pagemap.h>
				22	#include <linux/pagevec.h>
				23	#include <linux/init.h>
				24	#include <linux/module.h>
				25	#include <linux/mm_inline.h>
				26	#include <linux/buffer_head.h> /* for try_to_release_page() */
				27	#include <linux/module.h>
				28	#include <linux/percpu_counter.h>
				29	#include <linux/percpu.h>
				30	#include <linux/cpu.h>
				31	#include <linux/notifier.h>
				32	#include <linux/init.h>
				33
				34	/* How many pages do we try to swap or page in/out together? */
				35	int page_cluster;
				36
				37	#ifdef CONFIG_HUGETLB_PAGE
				38
				39	void put_page(struct page *page)
				40	{
				41	if (unlikely(PageCompound(page))) {
				42	page = (struct page *)page->private;
				43	if (put_page_testzero(page)) {
				44	void (dtor)(struct page page);
				45
				46	dtor = (void ()(struct page ))page[1].mapping;
				47	(*dtor)(page);
				48	}
				49	return;
				50	}
				51	if (!PageReserved(page) && put_page_testzero(page))
				52	__page_cache_release(page);
				53	}
				54	EXPORT_SYMBOL(put_page);
				55	#endif
				56
				57	/*
				58	* Writeback is about to end against a page which has been marked for immediate
				59	* reclaim. If it still appears to be reclaimable, move it to the tail of the
				60	* inactive list. The page still has PageWriteback set, which will pin it.
				61	*
				62	* We don't expect many pages to come through here, so don't bother batching
				63	* things up.
				64	*
				65	* To avoid placing the page at the tail of the LRU while PG_writeback is still
				66	* set, this function will clear PG_writeback before performing the page
				67	* motion. Do that inside the lru lock because once PG_writeback is cleared
				68	* we may not touch the page.
				69	*
				70	* Returns zero if it cleared PG_writeback.
				71	*/
				72	int rotate_reclaimable_page(struct page *page)
				73	{
				74	struct zone *zone;
				75	unsigned long flags;
				76
				77	if (PageLocked(page))
				78	return 1;
				79	if (PageDirty(page))
				80	return 1;
				81	if (PageActive(page))
				82	return 1;
				83	if (!PageLRU(page))
				84	return 1;
				85
				86	zone = page_zone(page);
				87	spin_lock_irqsave(&zone->lru_lock, flags);
				88	if (PageLRU(page) && !PageActive(page)) {
				89	list_del(&page->lru);
				90	list_add_tail(&page->lru, &zone->inactive_list);
				91	inc_page_state(pgrotated);
				92	}
				93	if (!test_clear_page_writeback(page))
				94	BUG();
				95	spin_unlock_irqrestore(&zone->lru_lock, flags);
				96	return 0;
				97	}
				98
				99	/*
				100	* FIXME: speed this up?
				101	*/
				102	void fastcall activate_page(struct page *page)
				103	{
				104	struct zone *zone = page_zone(page);
				105
				106	spin_lock_irq(&zone->lru_lock);
				107	if (PageLRU(page) && !PageActive(page)) {
				108	del_page_from_inactive_list(zone, page);
				109	SetPageActive(page);
				110	add_page_to_active_list(zone, page);
				111	inc_page_state(pgactivate);
				112	}
				113	spin_unlock_irq(&zone->lru_lock);
				114	}
				115
				116	/*
				117	* Mark a page as having seen activity.
				118	*
				119	* inactive,unreferenced -> inactive,referenced
				120	* inactive,referenced -> active,unreferenced
				121	* active,unreferenced -> active,referenced
				122	*/
				123	void fastcall mark_page_accessed(struct page *page)
				124	{
				125	if (!PageActive(page) && PageReferenced(page) && PageLRU(page)) {
				126	activate_page(page);
				127	ClearPageReferenced(page);
				128	} else if (!PageReferenced(page)) {
				129	SetPageReferenced(page);
				130	}
				131	}
				132
				133	EXPORT_SYMBOL(mark_page_accessed);
				134
				135	/**
				136	* lru_cache_add: add a page to the page lists
				137	* @page: the page to add
				138	*/
				139	static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
				140	static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
				141
				142	void fastcall lru_cache_add(struct page *page)
				143	{
				144	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
				145
				146	page_cache_get(page);
				147	if (!pagevec_add(pvec, page))
				148	__pagevec_lru_add(pvec);
				149	put_cpu_var(lru_add_pvecs);
				150	}
				151
				152	void fastcall lru_cache_add_active(struct page *page)
				153	{
				154	struct pagevec *pvec = &get_cpu_var(lru_add_active_pvecs);
				155
				156	page_cache_get(page);
				157	if (!pagevec_add(pvec, page))
				158	__pagevec_lru_add_active(pvec);
				159	put_cpu_var(lru_add_active_pvecs);
				160	}
				161
				162	void lru_add_drain(void)
				163	{
				164	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
				165
				166	if (pagevec_count(pvec))
				167	__pagevec_lru_add(pvec);
				168	pvec = &__get_cpu_var(lru_add_active_pvecs);
				169	if (pagevec_count(pvec))
				170	__pagevec_lru_add_active(pvec);
				171	put_cpu_var(lru_add_pvecs);
				172	}
				173
				174	/*
				175	* This path almost never happens for VM activity - pages are normally
				176	* freed via pagevecs. But it gets used by networking.
				177	*/
				178	void fastcall __page_cache_release(struct page *page)
				179	{
				180	unsigned long flags;
				181	struct zone *zone = page_zone(page);
				182
				183	spin_lock_irqsave(&zone->lru_lock, flags);
				184	if (TestClearPageLRU(page))
				185	del_page_from_lru(zone, page);
				186	if (page_count(page) != 0)
				187	page = NULL;
				188	spin_unlock_irqrestore(&zone->lru_lock, flags);
				189	if (page)
				190	free_hot_page(page);
				191	}
				192
				193	EXPORT_SYMBOL(__page_cache_release);
				194
				195	/*
				196	* Batched page_cache_release(). Decrement the reference count on all the
				197	* passed pages. If it fell to zero then remove the page from the LRU and
				198	* free it.
				199	*
				200	* Avoid taking zone->lru_lock if possible, but if it is taken, retain it
				201	* for the remainder of the operation.
				202	*
				203	* The locking in this function is against shrink_cache(): we recheck the
				204	* page count inside the lock to see whether shrink_cache grabbed the page
				205	* via the LRU. If it did, give up: shrink_cache will free it.
				206	*/
				207	void release_pages(struct page **pages, int nr, int cold)
				208	{
				209	int i;
				210	struct pagevec pages_to_free;
				211	struct zone *zone = NULL;
				212
				213	pagevec_init(&pages_to_free, cold);
				214	for (i = 0; i < nr; i++) {
				215	struct page *page = pages[i];
				216	struct zone *pagezone;
				217
				218	if (PageReserved(page) \|\| !put_page_testzero(page))
				219	continue;
				220
				221	pagezone = page_zone(page);
				222	if (pagezone != zone) {
				223	if (zone)
				224	spin_unlock_irq(&zone->lru_lock);
				225	zone = pagezone;
				226	spin_lock_irq(&zone->lru_lock);
				227	}
				228	if (TestClearPageLRU(page))
				229	del_page_from_lru(zone, page);
				230	if (page_count(page) == 0) {
				231	if (!pagevec_add(&pages_to_free, page)) {
				232	spin_unlock_irq(&zone->lru_lock);
				233	__pagevec_free(&pages_to_free);
				234	pagevec_reinit(&pages_to_free);
				235	zone = NULL; /* No lock is held */
				236	}
				237	}
				238	}
				239	if (zone)
				240	spin_unlock_irq(&zone->lru_lock);
				241
				242	pagevec_free(&pages_to_free);
				243	}
				244
				245	/*
				246	* The pages which we're about to release may be in the deferred lru-addition
				247	* queues. That would prevent them from really being freed right now. That's
				248	* OK from a correctness point of view but is inefficient - those pages may be
				249	* cache-warm and we want to give them back to the page allocator ASAP.
				250	*
				251	* So __pagevec_release() will drain those queues here. __pagevec_lru_add()
				252	* and __pagevec_lru_add_active() call release_pages() directly to avoid
				253	* mutual recursion.
				254	*/
				255	void __pagevec_release(struct pagevec *pvec)
				256	{
				257	lru_add_drain();
				258	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
				259	pagevec_reinit(pvec);
				260	}
				261
				262	/*
				263	* pagevec_release() for pages which are known to not be on the LRU
				264	*
				265	* This function reinitialises the caller's pagevec.
				266	*/
				267	void __pagevec_release_nonlru(struct pagevec *pvec)
				268	{
				269	int i;
				270	struct pagevec pages_to_free;
				271
				272	pagevec_init(&pages_to_free, pvec->cold);
				273	pages_to_free.cold = pvec->cold;
				274	for (i = 0; i < pagevec_count(pvec); i++) {
				275	struct page *page = pvec->pages[i];
				276
				277	BUG_ON(PageLRU(page));
				278	if (put_page_testzero(page))
				279	pagevec_add(&pages_to_free, page);
				280	}
				281	pagevec_free(&pages_to_free);
				282	pagevec_reinit(pvec);
				283	}
				284
				285	/*
				286	* Add the passed pages to the LRU, then drop the caller's refcount
				287	* on them. Reinitialises the caller's pagevec.
				288	*/
				289	void __pagevec_lru_add(struct pagevec *pvec)
				290	{
				291	int i;
				292	struct zone *zone = NULL;
				293
				294	for (i = 0; i < pagevec_count(pvec); i++) {
				295	struct page *page = pvec->pages[i];
				296	struct zone *pagezone = page_zone(page);
				297
				298	if (pagezone != zone) {
				299	if (zone)
				300	spin_unlock_irq(&zone->lru_lock);
				301	zone = pagezone;
				302	spin_lock_irq(&zone->lru_lock);
				303	}
				304	if (TestSetPageLRU(page))
				305	BUG();
				306	add_page_to_inactive_list(zone, page);
				307	}
				308	if (zone)
				309	spin_unlock_irq(&zone->lru_lock);
				310	release_pages(pvec->pages, pvec->nr, pvec->cold);
				311	pagevec_reinit(pvec);
				312	}
				313
				314	EXPORT_SYMBOL(__pagevec_lru_add);
				315
				316	void __pagevec_lru_add_active(struct pagevec *pvec)
				317	{
				318	int i;
				319	struct zone *zone = NULL;
				320
				321	for (i = 0; i < pagevec_count(pvec); i++) {
				322	struct page *page = pvec->pages[i];
				323	struct zone *pagezone = page_zone(page);
				324
				325	if (pagezone != zone) {
				326	if (zone)
				327	spin_unlock_irq(&zone->lru_lock);
				328	zone = pagezone;
				329	spin_lock_irq(&zone->lru_lock);
				330	}
				331	if (TestSetPageLRU(page))
				332	BUG();
				333	if (TestSetPageActive(page))
				334	BUG();
				335	add_page_to_active_list(zone, page);
				336	}
				337	if (zone)
				338	spin_unlock_irq(&zone->lru_lock);
				339	release_pages(pvec->pages, pvec->nr, pvec->cold);
				340	pagevec_reinit(pvec);
				341	}
				342
				343	/*
				344	* Try to drop buffers from the pages in a pagevec
				345	*/
				346	void pagevec_strip(struct pagevec *pvec)
				347	{
				348	int i;
				349
				350	for (i = 0; i < pagevec_count(pvec); i++) {
				351	struct page *page = pvec->pages[i];
				352
				353	if (PagePrivate(page) && !TestSetPageLocked(page)) {
				354	try_to_release_page(page, 0);
				355	unlock_page(page);
				356	}
				357	}
				358	}
				359
				360	/**
				361	* pagevec_lookup - gang pagecache lookup
				362	* @pvec: Where the resulting pages are placed
				363	* @mapping: The address_space to search
				364	* @start: The starting page index
				365	* @nr_pages: The maximum number of pages
				366	*
				367	* pagevec_lookup() will search for and return a group of up to @nr_pages pages
				368	* in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a
				369	* reference against the pages in @pvec.
				370	*
				371	* The search returns a group of mapping-contiguous pages with ascending
				372	* indexes. There may be holes in the indices due to not-present pages.
				373	*
				374	* pagevec_lookup() returns the number of pages which were found.
				375	*/
				376	unsigned pagevec_lookup(struct pagevec pvec, struct address_space mapping,
				377	pgoff_t start, unsigned nr_pages)
				378	{
				379	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
				380	return pagevec_count(pvec);
				381	}
				382
				383	unsigned pagevec_lookup_tag(struct pagevec pvec, struct address_space mapping,
				384	pgoff_t *index, int tag, unsigned nr_pages)
				385	{
				386	pvec->nr = find_get_pages_tag(mapping, index, tag,
				387	nr_pages, pvec->pages);
				388	return pagevec_count(pvec);
				389	}
				390
				391
				392	#ifdef CONFIG_SMP
				393	/*
				394	* We tolerate a little inaccuracy to avoid ping-ponging the counter between
				395	* CPUs
				396	*/
				397	#define ACCT_THRESHOLD max(16, NR_CPUS * 2)
				398
				399	static DEFINE_PER_CPU(long, committed_space) = 0;
				400
				401	void vm_acct_memory(long pages)
				402	{
				403	long *local;
				404
				405	preempt_disable();
				406	local = &__get_cpu_var(committed_space);
				407	*local += pages;
				408	if (local > ACCT_THRESHOLD \|\| local < -ACCT_THRESHOLD) {
				409	atomic_add(*local, &vm_committed_space);
				410	*local = 0;
				411	}
				412	preempt_enable();
				413	}
				414	EXPORT_SYMBOL(vm_acct_memory);
				415
				416	#ifdef CONFIG_HOTPLUG_CPU
				417	static void lru_drain_cache(unsigned int cpu)
				418	{
				419	struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu);
				420
				421	/* CPU is dead, so no locking needed. */
				422	if (pagevec_count(pvec))
				423	__pagevec_lru_add(pvec);
				424	pvec = &per_cpu(lru_add_active_pvecs, cpu);
				425	if (pagevec_count(pvec))
				426	__pagevec_lru_add_active(pvec);
				427	}
				428
				429	/* Drop the CPU's cached committed space back into the central pool. */
				430	static int cpu_swap_callback(struct notifier_block *nfb,
				431	unsigned long action,
				432	void *hcpu)
				433	{
				434	long *committed;
				435
				436	committed = &per_cpu(committed_space, (long)hcpu);
				437	if (action == CPU_DEAD) {
				438	atomic_add(*committed, &vm_committed_space);
				439	*committed = 0;
				440	lru_drain_cache((long)hcpu);
				441	}
				442	return NOTIFY_OK;
				443	}
				444	#endif /* CONFIG_HOTPLUG_CPU */
				445	#endif /* CONFIG_SMP */
				446
				447	#ifdef CONFIG_SMP
				448	void percpu_counter_mod(struct percpu_counter *fbc, long amount)
				449	{
				450	long count;
				451	long *pcount;
				452	int cpu = get_cpu();
				453
				454	pcount = per_cpu_ptr(fbc->counters, cpu);
				455	count = *pcount + amount;
				456	if (count >= FBC_BATCH \|\| count <= -FBC_BATCH) {
				457	spin_lock(&fbc->lock);
				458	fbc->count += count;
				459	spin_unlock(&fbc->lock);
				460	count = 0;
				461	}
				462	*pcount = count;
				463	put_cpu();
				464	}
				465	EXPORT_SYMBOL(percpu_counter_mod);
				466	#endif
				467
				468	/*
				469	* Perform any setup for the swap system
				470	*/
				471	void __init swap_setup(void)
				472	{
				473	unsigned long megs = num_physpages >> (20 - PAGE_SHIFT);
				474
				475	/* Use a smaller cluster for small-memory machines */
				476	if (megs < 16)
				477	page_cluster = 2;
				478	else
				479	page_cluster = 3;
				480	/*
				481	* Right now other parts of the system means that we
				482	* _really_ don't want to cluster much more
				483	*/
				484	hotcpu_notifier(cpu_swap_callback, 0);
				485	}