blob: 7497041d0631a89e4383eb29d43b2179bc6305cd [file] [log] [blame]
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -07001/******************************************************************************
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -07002 * Xen balloon driver - enables returning/claiming memory to/from Xen.
3 *
4 * Copyright (c) 2003, B Dragovic
5 * Copyright (c) 2003-2004, M Williamson, K Fraser
6 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#include <linux/kernel.h>
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070034#include <linux/sched.h>
35#include <linux/errno.h>
36#include <linux/mm.h>
37#include <linux/bootmem.h>
38#include <linux/pagemap.h>
39#include <linux/highmem.h>
40#include <linux/mutex.h>
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070041#include <linux/list.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090042#include <linux/gfp.h>
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070043
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070044#include <asm/page.h>
45#include <asm/pgalloc.h>
46#include <asm/pgtable.h>
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070047#include <asm/tlb.h>
Jeremy Fitzhardinge66946f62010-09-14 10:32:32 -070048#include <asm/e820.h>
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070049
Jeremy Fitzhardingeecbf29c2008-12-16 12:37:07 -080050#include <asm/xen/hypervisor.h>
51#include <asm/xen/hypercall.h>
Jeremy Fitzhardinge1ccbf532009-10-06 15:11:14 -070052
53#include <xen/xen.h>
Jeremy Fitzhardingeecbf29c2008-12-16 12:37:07 -080054#include <xen/interface/xen.h>
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070055#include <xen/interface/memory.h>
Daniel De Graaf803eb042011-03-14 11:29:37 -040056#include <xen/balloon.h>
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070057#include <xen/features.h>
58#include <xen/page.h>
59
Daniel Kiper95d2ac42011-03-08 22:48:24 +010060/*
61 * balloon_process() state:
62 *
63 * BP_DONE: done or nothing to do,
64 * BP_EAGAIN: error, go to sleep,
65 * BP_ECANCELED: error, balloon operation canceled.
66 */
67
68enum bp_state {
69 BP_DONE,
70 BP_EAGAIN,
71 BP_ECANCELED
72};
73
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070074
75static DEFINE_MUTEX(balloon_mutex);
76
Daniel De Graaf803eb042011-03-14 11:29:37 -040077struct balloon_stats balloon_stats;
78EXPORT_SYMBOL_GPL(balloon_stats);
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070079
80/* We increase/decrease in batches which fit in a page */
81static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
82
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070083#ifdef CONFIG_HIGHMEM
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070084#define inc_totalhigh_pages() (totalhigh_pages++)
85#define dec_totalhigh_pages() (totalhigh_pages--)
86#else
87#define inc_totalhigh_pages() do {} while(0)
88#define dec_totalhigh_pages() do {} while(0)
89#endif
90
91/* List of ballooned pages, threaded through the mem_map array. */
92static LIST_HEAD(ballooned_pages);
93
94/* Main work function, always executed in process context. */
95static void balloon_process(struct work_struct *work);
Daniel Kiper95170b22011-03-08 22:47:39 +010096static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -070097
98/* When ballooning out (allocating memory to return to Xen) we don't really
99 want the kernel to try too hard since that can trigger the oom killer. */
100#define GFP_BALLOON \
101 (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
102
103static void scrub_page(struct page *page)
104{
105#ifdef CONFIG_XEN_SCRUB_PAGES
Jeremy Fitzhardinge26a3e992008-11-17 09:35:00 -0800106 clear_highpage(page);
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700107#endif
108}
109
110/* balloon_append: add the given page to the balloon. */
Jeremy Fitzhardinge9be4d452010-08-31 15:01:16 -0700111static void __balloon_append(struct page *page)
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700112{
113 /* Lowmem is re-populated first, so highmem pages go at list tail. */
114 if (PageHighMem(page)) {
115 list_add_tail(&page->lru, &ballooned_pages);
116 balloon_stats.balloon_high++;
117 dec_totalhigh_pages();
118 } else {
119 list_add(&page->lru, &ballooned_pages);
120 balloon_stats.balloon_low++;
121 }
Jeremy Fitzhardinge9be4d452010-08-31 15:01:16 -0700122}
Gianluca Guida3d65c942009-07-30 22:54:36 +0100123
Jeremy Fitzhardinge9be4d452010-08-31 15:01:16 -0700124static void balloon_append(struct page *page)
125{
126 __balloon_append(page);
Gianluca Guida3d65c942009-07-30 22:54:36 +0100127 totalram_pages--;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700128}
129
130/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
131static struct page *balloon_retrieve(void)
132{
133 struct page *page;
134
135 if (list_empty(&ballooned_pages))
136 return NULL;
137
138 page = list_entry(ballooned_pages.next, struct page, lru);
139 list_del(&page->lru);
140
141 if (PageHighMem(page)) {
142 balloon_stats.balloon_high--;
143 inc_totalhigh_pages();
144 }
145 else
146 balloon_stats.balloon_low--;
147
Gianluca Guida3d65c942009-07-30 22:54:36 +0100148 totalram_pages++;
149
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700150 return page;
151}
152
153static struct page *balloon_first_page(void)
154{
155 if (list_empty(&ballooned_pages))
156 return NULL;
157 return list_entry(ballooned_pages.next, struct page, lru);
158}
159
160static struct page *balloon_next_page(struct page *page)
161{
162 struct list_head *next = page->lru.next;
163 if (next == &ballooned_pages)
164 return NULL;
165 return list_entry(next, struct page, lru);
166}
167
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100168static enum bp_state update_schedule(enum bp_state state)
169{
170 if (state == BP_DONE) {
171 balloon_stats.schedule_delay = 1;
172 balloon_stats.retry_count = 1;
173 return BP_DONE;
174 }
175
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100176 ++balloon_stats.retry_count;
177
178 if (balloon_stats.max_retry_count != RETRY_UNLIMITED &&
179 balloon_stats.retry_count > balloon_stats.max_retry_count) {
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100180 balloon_stats.schedule_delay = 1;
181 balloon_stats.retry_count = 1;
182 return BP_ECANCELED;
183 }
184
185 balloon_stats.schedule_delay <<= 1;
186
187 if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
188 balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
189
190 return BP_EAGAIN;
191}
192
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700193static unsigned long current_target(void)
194{
Ian Campbellbc2c0302009-06-05 11:58:37 +0100195 unsigned long target = balloon_stats.target_pages;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700196
197 target = min(target,
198 balloon_stats.current_pages +
199 balloon_stats.balloon_low +
200 balloon_stats.balloon_high);
201
202 return target;
203}
204
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100205static enum bp_state increase_reservation(unsigned long nr_pages)
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700206{
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100207 int rc;
Jeremy Fitzhardinge2f70e0a2010-09-02 23:11:17 -0700208 unsigned long pfn, i;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700209 struct page *page;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700210 struct xen_memory_reservation reservation = {
211 .address_bits = 0,
212 .extent_order = 0,
213 .domid = DOMID_SELF
214 };
215
216 if (nr_pages > ARRAY_SIZE(frame_list))
217 nr_pages = ARRAY_SIZE(frame_list);
218
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700219 page = balloon_first_page();
220 for (i = 0; i < nr_pages; i++) {
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100221 if (!page) {
222 nr_pages = i;
223 break;
224 }
Joe Perchesa419aef2009-08-18 11:18:35 -0700225 frame_list[i] = page_to_pfn(page);
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700226 page = balloon_next_page(page);
227 }
228
Isaku Yamahataa90971e2008-05-26 23:31:14 +0100229 set_xen_guest_handle(reservation.extent_start, frame_list);
Jeremy Fitzhardingefde28e82008-07-24 16:28:00 -0700230 reservation.nr_extents = nr_pages;
231 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
Konrad Rzeszutek Wilk40095de2011-03-14 11:42:40 -0400232 if (rc <= 0)
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100233 return BP_EAGAIN;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700234
Ian Campbellbc2c0302009-06-05 11:58:37 +0100235 for (i = 0; i < rc; i++) {
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700236 page = balloon_retrieve();
237 BUG_ON(page == NULL);
238
239 pfn = page_to_pfn(page);
240 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
241 phys_to_machine_mapping_valid(pfn));
242
243 set_phys_to_machine(pfn, frame_list[i]);
244
245 /* Link back into the page tables if not highmem. */
246 if (pfn < max_low_pfn) {
247 int ret;
248 ret = HYPERVISOR_update_va_mapping(
249 (unsigned long)__va(pfn << PAGE_SHIFT),
250 mfn_pte(frame_list[i], PAGE_KERNEL),
251 0);
252 BUG_ON(ret);
253 }
254
255 /* Relinquish the page back to the allocator. */
256 ClearPageReserved(page);
257 init_page_count(page);
258 __free_page(page);
259 }
260
Ian Campbellbc2c0302009-06-05 11:58:37 +0100261 balloon_stats.current_pages += rc;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700262
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100263 return BP_DONE;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700264}
265
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100266static enum bp_state decrease_reservation(unsigned long nr_pages)
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700267{
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100268 enum bp_state state = BP_DONE;
Jeremy Fitzhardinge2f70e0a2010-09-02 23:11:17 -0700269 unsigned long pfn, i;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700270 struct page *page;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700271 int ret;
272 struct xen_memory_reservation reservation = {
273 .address_bits = 0,
274 .extent_order = 0,
275 .domid = DOMID_SELF
276 };
277
278 if (nr_pages > ARRAY_SIZE(frame_list))
279 nr_pages = ARRAY_SIZE(frame_list);
280
281 for (i = 0; i < nr_pages; i++) {
282 if ((page = alloc_page(GFP_BALLOON)) == NULL) {
283 nr_pages = i;
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100284 state = BP_EAGAIN;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700285 break;
286 }
287
288 pfn = page_to_pfn(page);
289 frame_list[i] = pfn_to_mfn(pfn);
290
291 scrub_page(page);
Dan Magenheimer1058a752009-01-22 14:36:08 -0800292
Ian Campbellff4ce8c2009-01-23 16:26:21 +0000293 if (!PageHighMem(page)) {
294 ret = HYPERVISOR_update_va_mapping(
295 (unsigned long)__va(pfn << PAGE_SHIFT),
296 __pte_ma(0), 0);
297 BUG_ON(ret);
298 }
299
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700300 }
301
302 /* Ensure that ballooned highmem pages don't have kmaps. */
303 kmap_flush_unused();
304 flush_tlb_all();
305
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700306 /* No more mappings: invalidate P2M and add to balloon. */
307 for (i = 0; i < nr_pages; i++) {
308 pfn = mfn_to_pfn(frame_list[i]);
309 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
310 balloon_append(pfn_to_page(pfn));
311 }
312
Isaku Yamahataa90971e2008-05-26 23:31:14 +0100313 set_xen_guest_handle(reservation.extent_start, frame_list);
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700314 reservation.nr_extents = nr_pages;
315 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
316 BUG_ON(ret != nr_pages);
317
318 balloon_stats.current_pages -= nr_pages;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700319
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100320 return state;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700321}
322
323/*
324 * We avoid multiple worker processes conflicting via the balloon mutex.
325 * We may of course race updates of the target counts (which are protected
326 * by the balloon lock), or with changes to the Xen hard limit, but we will
327 * recover from these in time.
328 */
329static void balloon_process(struct work_struct *work)
330{
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100331 enum bp_state state = BP_DONE;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700332 long credit;
333
334 mutex_lock(&balloon_mutex);
335
336 do {
337 credit = current_target() - balloon_stats.current_pages;
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100338
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700339 if (credit > 0)
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100340 state = increase_reservation(credit);
341
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700342 if (credit < 0)
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100343 state = decrease_reservation(-credit);
344
345 state = update_schedule(state);
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700346
347#ifndef CONFIG_PREEMPT
348 if (need_resched())
349 schedule();
350#endif
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100351 } while (credit && state == BP_DONE);
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700352
353 /* Schedule more work if there is some still to be done. */
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100354 if (state == BP_EAGAIN)
355 schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700356
357 mutex_unlock(&balloon_mutex);
358}
359
360/* Resets the Xen limit, sets new target, and kicks off processing. */
Daniel De Graaf803eb042011-03-14 11:29:37 -0400361void balloon_set_new_target(unsigned long target)
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700362{
363 /* No need for lock. Not read-modify-write updates. */
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700364 balloon_stats.target_pages = target;
Daniel Kiper95170b22011-03-08 22:47:39 +0100365 schedule_delayed_work(&balloon_worker, 0);
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700366}
Daniel De Graaf803eb042011-03-14 11:29:37 -0400367EXPORT_SYMBOL_GPL(balloon_set_new_target);
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700368
369static int __init balloon_init(void)
370{
Jeremy Fitzhardinge66946f62010-09-14 10:32:32 -0700371 unsigned long pfn, extra_pfn_end;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700372 struct page *page;
373
Jeremy Fitzhardinge6e833582008-08-19 13:16:17 -0700374 if (!xen_pv_domain())
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700375 return -ENODEV;
376
Daniel De Graaf803eb042011-03-14 11:29:37 -0400377 pr_info("xen/balloon: Initialising balloon driver.\n");
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700378
379 balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700380 balloon_stats.target_pages = balloon_stats.current_pages;
381 balloon_stats.balloon_low = 0;
382 balloon_stats.balloon_high = 0;
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700383
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100384 balloon_stats.schedule_delay = 1;
385 balloon_stats.max_schedule_delay = 32;
386 balloon_stats.retry_count = 1;
Konrad Rzeszutek Wilk40095de2011-03-14 11:42:40 -0400387 balloon_stats.max_retry_count = RETRY_UNLIMITED;
Daniel Kiper95d2ac42011-03-08 22:48:24 +0100388
Jeremy Fitzhardinge2a4c92f2010-12-02 15:30:06 -0800389 /*
390 * Initialise the balloon with excess memory space. We need
391 * to make sure we don't add memory which doesn't exist or
392 * logically exist. The E820 map can be trimmed to be smaller
393 * than the amount of physical memory due to the mem= command
394 * line parameter. And if this is a 32-bit non-HIGHMEM kernel
395 * on a system with memory which requires highmem to access,
396 * don't try to use it.
397 */
398 extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()),
Jeremy Fitzhardinge66946f62010-09-14 10:32:32 -0700399 (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size));
Jeremy Fitzhardinge9be4d452010-08-31 15:01:16 -0700400 for (pfn = PFN_UP(xen_extra_mem_start);
Jeremy Fitzhardinge66946f62010-09-14 10:32:32 -0700401 pfn < extra_pfn_end;
Jeremy Fitzhardinge9be4d452010-08-31 15:01:16 -0700402 pfn++) {
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700403 page = pfn_to_page(pfn);
Jeremy Fitzhardinge9be4d452010-08-31 15:01:16 -0700404 /* totalram_pages doesn't include the boot-time
405 balloon extension, so don't subtract from it. */
406 __balloon_append(page);
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700407 }
408
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700409 return 0;
410}
411
412subsys_initcall(balloon_init);
413
Jeremy Fitzhardinge17758262008-04-02 10:54:13 -0700414MODULE_LICENSE("GPL");