| Jeremy Fitzhardinge | 1775826 | 2008-04-02 10:54:13 -0700 | [diff] [blame] | 1 | /****************************************************************************** | 
 | 2 |  * balloon.c | 
 | 3 |  * | 
 | 4 |  * Xen balloon driver - enables returning/claiming memory to/from Xen. | 
 | 5 |  * | 
 | 6 |  * Copyright (c) 2003, B Dragovic | 
 | 7 |  * Copyright (c) 2003-2004, M Williamson, K Fraser | 
 | 8 |  * Copyright (c) 2005 Dan M. Smith, IBM Corporation | 
 | 9 |  * | 
 | 10 |  * This program is free software; you can redistribute it and/or | 
 | 11 |  * modify it under the terms of the GNU General Public License version 2 | 
 | 12 |  * as published by the Free Software Foundation; or, when distributed | 
 | 13 |  * separately from the Linux kernel or incorporated into other | 
 | 14 |  * software packages, subject to the following license: | 
 | 15 |  * | 
 | 16 |  * Permission is hereby granted, free of charge, to any person obtaining a copy | 
 | 17 |  * of this source file (the "Software"), to deal in the Software without | 
 | 18 |  * restriction, including without limitation the rights to use, copy, modify, | 
 | 19 |  * merge, publish, distribute, sublicense, and/or sell copies of the Software, | 
 | 20 |  * and to permit persons to whom the Software is furnished to do so, subject to | 
 | 21 |  * the following conditions: | 
 | 22 |  * | 
 | 23 |  * The above copyright notice and this permission notice shall be included in | 
 | 24 |  * all copies or substantial portions of the Software. | 
 | 25 |  * | 
 | 26 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
 | 27 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
 | 28 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
 | 29 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
 | 30 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 
 | 31 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 
 | 32 |  * IN THE SOFTWARE. | 
 | 33 |  */ | 
 | 34 |  | 
 | 35 | #include <linux/kernel.h> | 
 | 36 | #include <linux/module.h> | 
 | 37 | #include <linux/sched.h> | 
 | 38 | #include <linux/errno.h> | 
 | 39 | #include <linux/mm.h> | 
 | 40 | #include <linux/bootmem.h> | 
 | 41 | #include <linux/pagemap.h> | 
 | 42 | #include <linux/highmem.h> | 
 | 43 | #include <linux/mutex.h> | 
 | 44 | #include <linux/highmem.h> | 
 | 45 | #include <linux/list.h> | 
 | 46 | #include <linux/sysdev.h> | 
 | 47 |  | 
 | 48 | #include <asm/xen/hypervisor.h> | 
 | 49 | #include <asm/page.h> | 
 | 50 | #include <asm/pgalloc.h> | 
 | 51 | #include <asm/pgtable.h> | 
 | 52 | #include <asm/uaccess.h> | 
 | 53 | #include <asm/tlb.h> | 
 | 54 |  | 
 | 55 | #include <xen/interface/memory.h> | 
 | 56 | #include <xen/balloon.h> | 
 | 57 | #include <xen/xenbus.h> | 
 | 58 | #include <xen/features.h> | 
 | 59 | #include <xen/page.h> | 
 | 60 |  | 
 | 61 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) | 
 | 62 |  | 
 | 63 | #define BALLOON_CLASS_NAME "memory" | 
 | 64 |  | 
 | 65 | struct balloon_stats { | 
 | 66 | 	/* We aim for 'current allocation' == 'target allocation'. */ | 
 | 67 | 	unsigned long current_pages; | 
 | 68 | 	unsigned long target_pages; | 
 | 69 | 	/* We may hit the hard limit in Xen. If we do then we remember it. */ | 
 | 70 | 	unsigned long hard_limit; | 
 | 71 | 	/* | 
 | 72 | 	 * Drivers may alter the memory reservation independently, but they | 
 | 73 | 	 * must inform the balloon driver so we avoid hitting the hard limit. | 
 | 74 | 	 */ | 
 | 75 | 	unsigned long driver_pages; | 
 | 76 | 	/* Number of pages in high- and low-memory balloons. */ | 
 | 77 | 	unsigned long balloon_low; | 
 | 78 | 	unsigned long balloon_high; | 
 | 79 | }; | 
 | 80 |  | 
 | 81 | static DEFINE_MUTEX(balloon_mutex); | 
 | 82 |  | 
 | 83 | static struct sys_device balloon_sysdev; | 
 | 84 |  | 
 | 85 | static int register_balloon(struct sys_device *sysdev); | 
 | 86 |  | 
 | 87 | /* | 
 | 88 |  * Protects atomic reservation decrease/increase against concurrent increases. | 
 | 89 |  * Also protects non-atomic updates of current_pages and driver_pages, and | 
 | 90 |  * balloon lists. | 
 | 91 |  */ | 
 | 92 | static DEFINE_SPINLOCK(balloon_lock); | 
 | 93 |  | 
 | 94 | static struct balloon_stats balloon_stats; | 
 | 95 |  | 
 | 96 | /* We increase/decrease in batches which fit in a page */ | 
 | 97 | static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; | 
 | 98 |  | 
 | 99 | /* VM /proc information for memory */ | 
 | 100 | extern unsigned long totalram_pages; | 
 | 101 |  | 
 | 102 | #ifdef CONFIG_HIGHMEM | 
 | 103 | extern unsigned long totalhigh_pages; | 
 | 104 | #define inc_totalhigh_pages() (totalhigh_pages++) | 
 | 105 | #define dec_totalhigh_pages() (totalhigh_pages--) | 
 | 106 | #else | 
 | 107 | #define inc_totalhigh_pages() do {} while(0) | 
 | 108 | #define dec_totalhigh_pages() do {} while(0) | 
 | 109 | #endif | 
 | 110 |  | 
 | 111 | /* List of ballooned pages, threaded through the mem_map array. */ | 
 | 112 | static LIST_HEAD(ballooned_pages); | 
 | 113 |  | 
 | 114 | /* Main work function, always executed in process context. */ | 
 | 115 | static void balloon_process(struct work_struct *work); | 
 | 116 | static DECLARE_WORK(balloon_worker, balloon_process); | 
 | 117 | static struct timer_list balloon_timer; | 
 | 118 |  | 
 | 119 | /* When ballooning out (allocating memory to return to Xen) we don't really | 
 | 120 |    want the kernel to try too hard since that can trigger the oom killer. */ | 
 | 121 | #define GFP_BALLOON \ | 
 | 122 | 	(GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC) | 
 | 123 |  | 
 | 124 | static void scrub_page(struct page *page) | 
 | 125 | { | 
 | 126 | #ifdef CONFIG_XEN_SCRUB_PAGES | 
 | 127 | 	if (PageHighMem(page)) { | 
 | 128 | 		void *v = kmap(page); | 
 | 129 | 		clear_page(v); | 
 | 130 | 		kunmap(v); | 
 | 131 | 	} else { | 
 | 132 | 		void *v = page_address(page); | 
 | 133 | 		clear_page(v); | 
 | 134 | 	} | 
 | 135 | #endif | 
 | 136 | } | 
 | 137 |  | 
 | 138 | /* balloon_append: add the given page to the balloon. */ | 
 | 139 | static void balloon_append(struct page *page) | 
 | 140 | { | 
 | 141 | 	/* Lowmem is re-populated first, so highmem pages go at list tail. */ | 
 | 142 | 	if (PageHighMem(page)) { | 
 | 143 | 		list_add_tail(&page->lru, &ballooned_pages); | 
 | 144 | 		balloon_stats.balloon_high++; | 
 | 145 | 		dec_totalhigh_pages(); | 
 | 146 | 	} else { | 
 | 147 | 		list_add(&page->lru, &ballooned_pages); | 
 | 148 | 		balloon_stats.balloon_low++; | 
 | 149 | 	} | 
 | 150 | } | 
 | 151 |  | 
 | 152 | /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ | 
 | 153 | static struct page *balloon_retrieve(void) | 
 | 154 | { | 
 | 155 | 	struct page *page; | 
 | 156 |  | 
 | 157 | 	if (list_empty(&ballooned_pages)) | 
 | 158 | 		return NULL; | 
 | 159 |  | 
 | 160 | 	page = list_entry(ballooned_pages.next, struct page, lru); | 
 | 161 | 	list_del(&page->lru); | 
 | 162 |  | 
 | 163 | 	if (PageHighMem(page)) { | 
 | 164 | 		balloon_stats.balloon_high--; | 
 | 165 | 		inc_totalhigh_pages(); | 
 | 166 | 	} | 
 | 167 | 	else | 
 | 168 | 		balloon_stats.balloon_low--; | 
 | 169 |  | 
 | 170 | 	return page; | 
 | 171 | } | 
 | 172 |  | 
 | 173 | static struct page *balloon_first_page(void) | 
 | 174 | { | 
 | 175 | 	if (list_empty(&ballooned_pages)) | 
 | 176 | 		return NULL; | 
 | 177 | 	return list_entry(ballooned_pages.next, struct page, lru); | 
 | 178 | } | 
 | 179 |  | 
 | 180 | static struct page *balloon_next_page(struct page *page) | 
 | 181 | { | 
 | 182 | 	struct list_head *next = page->lru.next; | 
 | 183 | 	if (next == &ballooned_pages) | 
 | 184 | 		return NULL; | 
 | 185 | 	return list_entry(next, struct page, lru); | 
 | 186 | } | 
 | 187 |  | 
 | 188 | static void balloon_alarm(unsigned long unused) | 
 | 189 | { | 
 | 190 | 	schedule_work(&balloon_worker); | 
 | 191 | } | 
 | 192 |  | 
 | 193 | static unsigned long current_target(void) | 
 | 194 | { | 
 | 195 | 	unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit); | 
 | 196 |  | 
 | 197 | 	target = min(target, | 
 | 198 | 		     balloon_stats.current_pages + | 
 | 199 | 		     balloon_stats.balloon_low + | 
 | 200 | 		     balloon_stats.balloon_high); | 
 | 201 |  | 
 | 202 | 	return target; | 
 | 203 | } | 
 | 204 |  | 
 | 205 | static int increase_reservation(unsigned long nr_pages) | 
 | 206 | { | 
 | 207 | 	unsigned long  pfn, i, flags; | 
 | 208 | 	struct page   *page; | 
 | 209 | 	long           rc; | 
 | 210 | 	struct xen_memory_reservation reservation = { | 
 | 211 | 		.address_bits = 0, | 
 | 212 | 		.extent_order = 0, | 
 | 213 | 		.domid        = DOMID_SELF | 
 | 214 | 	}; | 
 | 215 |  | 
 | 216 | 	if (nr_pages > ARRAY_SIZE(frame_list)) | 
 | 217 | 		nr_pages = ARRAY_SIZE(frame_list); | 
 | 218 |  | 
 | 219 | 	spin_lock_irqsave(&balloon_lock, flags); | 
 | 220 |  | 
 | 221 | 	page = balloon_first_page(); | 
 | 222 | 	for (i = 0; i < nr_pages; i++) { | 
 | 223 | 		BUG_ON(page == NULL); | 
 | 224 | 		frame_list[i] = page_to_pfn(page);; | 
 | 225 | 		page = balloon_next_page(page); | 
 | 226 | 	} | 
 | 227 |  | 
 | 228 | 	reservation.extent_start = (unsigned long)frame_list; | 
 | 229 | 	reservation.nr_extents   = nr_pages; | 
 | 230 | 	rc = HYPERVISOR_memory_op( | 
 | 231 | 		XENMEM_populate_physmap, &reservation); | 
 | 232 | 	if (rc < nr_pages) { | 
 | 233 | 		if (rc > 0) { | 
 | 234 | 			int ret; | 
 | 235 |  | 
 | 236 | 			/* We hit the Xen hard limit: reprobe. */ | 
 | 237 | 			reservation.nr_extents = rc; | 
 | 238 | 			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | 
 | 239 | 					&reservation); | 
 | 240 | 			BUG_ON(ret != rc); | 
 | 241 | 		} | 
 | 242 | 		if (rc >= 0) | 
 | 243 | 			balloon_stats.hard_limit = (balloon_stats.current_pages + rc - | 
 | 244 | 						    balloon_stats.driver_pages); | 
 | 245 | 		goto out; | 
 | 246 | 	} | 
 | 247 |  | 
 | 248 | 	for (i = 0; i < nr_pages; i++) { | 
 | 249 | 		page = balloon_retrieve(); | 
 | 250 | 		BUG_ON(page == NULL); | 
 | 251 |  | 
 | 252 | 		pfn = page_to_pfn(page); | 
 | 253 | 		BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && | 
 | 254 | 		       phys_to_machine_mapping_valid(pfn)); | 
 | 255 |  | 
 | 256 | 		set_phys_to_machine(pfn, frame_list[i]); | 
 | 257 |  | 
 | 258 | 		/* Link back into the page tables if not highmem. */ | 
 | 259 | 		if (pfn < max_low_pfn) { | 
 | 260 | 			int ret; | 
 | 261 | 			ret = HYPERVISOR_update_va_mapping( | 
 | 262 | 				(unsigned long)__va(pfn << PAGE_SHIFT), | 
 | 263 | 				mfn_pte(frame_list[i], PAGE_KERNEL), | 
 | 264 | 				0); | 
 | 265 | 			BUG_ON(ret); | 
 | 266 | 		} | 
 | 267 |  | 
 | 268 | 		/* Relinquish the page back to the allocator. */ | 
 | 269 | 		ClearPageReserved(page); | 
 | 270 | 		init_page_count(page); | 
 | 271 | 		__free_page(page); | 
 | 272 | 	} | 
 | 273 |  | 
 | 274 | 	balloon_stats.current_pages += nr_pages; | 
 | 275 | 	totalram_pages = balloon_stats.current_pages; | 
 | 276 |  | 
 | 277 |  out: | 
 | 278 | 	spin_unlock_irqrestore(&balloon_lock, flags); | 
 | 279 |  | 
 | 280 | 	return 0; | 
 | 281 | } | 
 | 282 |  | 
 | 283 | static int decrease_reservation(unsigned long nr_pages) | 
 | 284 | { | 
 | 285 | 	unsigned long  pfn, i, flags; | 
 | 286 | 	struct page   *page; | 
 | 287 | 	int            need_sleep = 0; | 
 | 288 | 	int ret; | 
 | 289 | 	struct xen_memory_reservation reservation = { | 
 | 290 | 		.address_bits = 0, | 
 | 291 | 		.extent_order = 0, | 
 | 292 | 		.domid        = DOMID_SELF | 
 | 293 | 	}; | 
 | 294 |  | 
 | 295 | 	if (nr_pages > ARRAY_SIZE(frame_list)) | 
 | 296 | 		nr_pages = ARRAY_SIZE(frame_list); | 
 | 297 |  | 
 | 298 | 	for (i = 0; i < nr_pages; i++) { | 
 | 299 | 		if ((page = alloc_page(GFP_BALLOON)) == NULL) { | 
 | 300 | 			nr_pages = i; | 
 | 301 | 			need_sleep = 1; | 
 | 302 | 			break; | 
 | 303 | 		} | 
 | 304 |  | 
 | 305 | 		pfn = page_to_pfn(page); | 
 | 306 | 		frame_list[i] = pfn_to_mfn(pfn); | 
 | 307 |  | 
 | 308 | 		scrub_page(page); | 
 | 309 | 	} | 
 | 310 |  | 
 | 311 | 	/* Ensure that ballooned highmem pages don't have kmaps. */ | 
 | 312 | 	kmap_flush_unused(); | 
 | 313 | 	flush_tlb_all(); | 
 | 314 |  | 
 | 315 | 	spin_lock_irqsave(&balloon_lock, flags); | 
 | 316 |  | 
 | 317 | 	/* No more mappings: invalidate P2M and add to balloon. */ | 
 | 318 | 	for (i = 0; i < nr_pages; i++) { | 
 | 319 | 		pfn = mfn_to_pfn(frame_list[i]); | 
 | 320 | 		set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 
 | 321 | 		balloon_append(pfn_to_page(pfn)); | 
 | 322 | 	} | 
 | 323 |  | 
 | 324 | 	reservation.extent_start = (unsigned long)frame_list; | 
 | 325 | 	reservation.nr_extents   = nr_pages; | 
 | 326 | 	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | 
 | 327 | 	BUG_ON(ret != nr_pages); | 
 | 328 |  | 
 | 329 | 	balloon_stats.current_pages -= nr_pages; | 
 | 330 | 	totalram_pages = balloon_stats.current_pages; | 
 | 331 |  | 
 | 332 | 	spin_unlock_irqrestore(&balloon_lock, flags); | 
 | 333 |  | 
 | 334 | 	return need_sleep; | 
 | 335 | } | 
 | 336 |  | 
 | 337 | /* | 
 | 338 |  * We avoid multiple worker processes conflicting via the balloon mutex. | 
 | 339 |  * We may of course race updates of the target counts (which are protected | 
 | 340 |  * by the balloon lock), or with changes to the Xen hard limit, but we will | 
 | 341 |  * recover from these in time. | 
 | 342 |  */ | 
 | 343 | static void balloon_process(struct work_struct *work) | 
 | 344 | { | 
 | 345 | 	int need_sleep = 0; | 
 | 346 | 	long credit; | 
 | 347 |  | 
 | 348 | 	mutex_lock(&balloon_mutex); | 
 | 349 |  | 
 | 350 | 	do { | 
 | 351 | 		credit = current_target() - balloon_stats.current_pages; | 
 | 352 | 		if (credit > 0) | 
 | 353 | 			need_sleep = (increase_reservation(credit) != 0); | 
 | 354 | 		if (credit < 0) | 
 | 355 | 			need_sleep = (decrease_reservation(-credit) != 0); | 
 | 356 |  | 
 | 357 | #ifndef CONFIG_PREEMPT | 
 | 358 | 		if (need_resched()) | 
 | 359 | 			schedule(); | 
 | 360 | #endif | 
 | 361 | 	} while ((credit != 0) && !need_sleep); | 
 | 362 |  | 
 | 363 | 	/* Schedule more work if there is some still to be done. */ | 
 | 364 | 	if (current_target() != balloon_stats.current_pages) | 
 | 365 | 		mod_timer(&balloon_timer, jiffies + HZ); | 
 | 366 |  | 
 | 367 | 	mutex_unlock(&balloon_mutex); | 
 | 368 | } | 
 | 369 |  | 
 | 370 | /* Resets the Xen limit, sets new target, and kicks off processing. */ | 
 | 371 | void balloon_set_new_target(unsigned long target) | 
 | 372 | { | 
 | 373 | 	/* No need for lock. Not read-modify-write updates. */ | 
 | 374 | 	balloon_stats.hard_limit   = ~0UL; | 
 | 375 | 	balloon_stats.target_pages = target; | 
 | 376 | 	schedule_work(&balloon_worker); | 
 | 377 | } | 
 | 378 |  | 
 | 379 | static struct xenbus_watch target_watch = | 
 | 380 | { | 
 | 381 | 	.node = "memory/target" | 
 | 382 | }; | 
 | 383 |  | 
 | 384 | /* React to a change in the target key */ | 
 | 385 | static void watch_target(struct xenbus_watch *watch, | 
 | 386 | 			 const char **vec, unsigned int len) | 
 | 387 | { | 
 | 388 | 	unsigned long long new_target; | 
 | 389 | 	int err; | 
 | 390 |  | 
 | 391 | 	err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); | 
 | 392 | 	if (err != 1) { | 
 | 393 | 		/* This is ok (for domain0 at least) - so just return */ | 
 | 394 | 		return; | 
 | 395 | 	} | 
 | 396 |  | 
 | 397 | 	/* The given memory/target value is in KiB, so it needs converting to | 
 | 398 | 	 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. | 
 | 399 | 	 */ | 
 | 400 | 	balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); | 
 | 401 | } | 
 | 402 |  | 
 | 403 | static int balloon_init_watcher(struct notifier_block *notifier, | 
 | 404 | 				unsigned long event, | 
 | 405 | 				void *data) | 
 | 406 | { | 
 | 407 | 	int err; | 
 | 408 |  | 
 | 409 | 	err = register_xenbus_watch(&target_watch); | 
 | 410 | 	if (err) | 
 | 411 | 		printk(KERN_ERR "Failed to set balloon watcher\n"); | 
 | 412 |  | 
 | 413 | 	return NOTIFY_DONE; | 
 | 414 | } | 
 | 415 |  | 
 | 416 | static struct notifier_block xenstore_notifier; | 
 | 417 |  | 
 | 418 | static int __init balloon_init(void) | 
 | 419 | { | 
 | 420 | 	unsigned long pfn; | 
 | 421 | 	struct page *page; | 
 | 422 |  | 
 | 423 | 	if (!is_running_on_xen()) | 
 | 424 | 		return -ENODEV; | 
 | 425 |  | 
 | 426 | 	pr_info("xen_balloon: Initialising balloon driver.\n"); | 
 | 427 |  | 
 | 428 | 	balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); | 
 | 429 | 	totalram_pages   = balloon_stats.current_pages; | 
 | 430 | 	balloon_stats.target_pages  = balloon_stats.current_pages; | 
 | 431 | 	balloon_stats.balloon_low   = 0; | 
 | 432 | 	balloon_stats.balloon_high  = 0; | 
 | 433 | 	balloon_stats.driver_pages  = 0UL; | 
 | 434 | 	balloon_stats.hard_limit    = ~0UL; | 
 | 435 |  | 
 | 436 | 	init_timer(&balloon_timer); | 
 | 437 | 	balloon_timer.data = 0; | 
 | 438 | 	balloon_timer.function = balloon_alarm; | 
 | 439 |  | 
 | 440 | 	register_balloon(&balloon_sysdev); | 
 | 441 |  | 
 | 442 | 	/* Initialise the balloon with excess memory space. */ | 
 | 443 | 	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { | 
 | 444 | 		page = pfn_to_page(pfn); | 
 | 445 | 		if (!PageReserved(page)) | 
 | 446 | 			balloon_append(page); | 
 | 447 | 	} | 
 | 448 |  | 
 | 449 | 	target_watch.callback = watch_target; | 
 | 450 | 	xenstore_notifier.notifier_call = balloon_init_watcher; | 
 | 451 |  | 
 | 452 | 	register_xenstore_notifier(&xenstore_notifier); | 
 | 453 |  | 
 | 454 | 	return 0; | 
 | 455 | } | 
 | 456 |  | 
 | 457 | subsys_initcall(balloon_init); | 
 | 458 |  | 
 | 459 | static void balloon_exit(void) | 
 | 460 | { | 
 | 461 |     /* XXX - release balloon here */ | 
 | 462 |     return; | 
 | 463 | } | 
 | 464 |  | 
 | 465 | module_exit(balloon_exit); | 
 | 466 |  | 
 | 467 | static void balloon_update_driver_allowance(long delta) | 
 | 468 | { | 
 | 469 | 	unsigned long flags; | 
 | 470 |  | 
 | 471 | 	spin_lock_irqsave(&balloon_lock, flags); | 
 | 472 | 	balloon_stats.driver_pages += delta; | 
 | 473 | 	spin_unlock_irqrestore(&balloon_lock, flags); | 
 | 474 | } | 
 | 475 |  | 
 | 476 | static int dealloc_pte_fn( | 
 | 477 | 	pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) | 
 | 478 | { | 
 | 479 | 	unsigned long mfn = pte_mfn(*pte); | 
 | 480 | 	int ret; | 
 | 481 | 	struct xen_memory_reservation reservation = { | 
 | 482 | 		.nr_extents   = 1, | 
 | 483 | 		.extent_order = 0, | 
 | 484 | 		.domid        = DOMID_SELF | 
 | 485 | 	}; | 
 | 486 | 	reservation.extent_start = (unsigned long)&mfn; | 
 | 487 | 	set_pte_at(&init_mm, addr, pte, __pte_ma(0ull)); | 
 | 488 | 	set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); | 
 | 489 | 	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | 
 | 490 | 	BUG_ON(ret != 1); | 
 | 491 | 	return 0; | 
 | 492 | } | 
 | 493 |  | 
 | 494 | static struct page **alloc_empty_pages_and_pagevec(int nr_pages) | 
 | 495 | { | 
 | 496 | 	unsigned long vaddr, flags; | 
 | 497 | 	struct page *page, **pagevec; | 
 | 498 | 	int i, ret; | 
 | 499 |  | 
 | 500 | 	pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); | 
 | 501 | 	if (pagevec == NULL) | 
 | 502 | 		return NULL; | 
 | 503 |  | 
 | 504 | 	for (i = 0; i < nr_pages; i++) { | 
 | 505 | 		page = pagevec[i] = alloc_page(GFP_KERNEL); | 
 | 506 | 		if (page == NULL) | 
 | 507 | 			goto err; | 
 | 508 |  | 
 | 509 | 		vaddr = (unsigned long)page_address(page); | 
 | 510 |  | 
 | 511 | 		scrub_page(page); | 
 | 512 |  | 
 | 513 | 		spin_lock_irqsave(&balloon_lock, flags); | 
 | 514 |  | 
 | 515 | 		if (xen_feature(XENFEAT_auto_translated_physmap)) { | 
 | 516 | 			unsigned long gmfn = page_to_pfn(page); | 
 | 517 | 			struct xen_memory_reservation reservation = { | 
 | 518 | 				.nr_extents   = 1, | 
 | 519 | 				.extent_order = 0, | 
 | 520 | 				.domid        = DOMID_SELF | 
 | 521 | 			}; | 
 | 522 | 			reservation.extent_start = (unsigned long)&gmfn; | 
 | 523 | 			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | 
 | 524 | 						   &reservation); | 
 | 525 | 			if (ret == 1) | 
 | 526 | 				ret = 0; /* success */ | 
 | 527 | 		} else { | 
 | 528 | 			ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE, | 
 | 529 | 						  dealloc_pte_fn, NULL); | 
 | 530 | 		} | 
 | 531 |  | 
 | 532 | 		if (ret != 0) { | 
 | 533 | 			spin_unlock_irqrestore(&balloon_lock, flags); | 
 | 534 | 			__free_page(page); | 
 | 535 | 			goto err; | 
 | 536 | 		} | 
 | 537 |  | 
 | 538 | 		totalram_pages = --balloon_stats.current_pages; | 
 | 539 |  | 
 | 540 | 		spin_unlock_irqrestore(&balloon_lock, flags); | 
 | 541 | 	} | 
 | 542 |  | 
 | 543 |  out: | 
 | 544 | 	schedule_work(&balloon_worker); | 
 | 545 | 	flush_tlb_all(); | 
 | 546 | 	return pagevec; | 
 | 547 |  | 
 | 548 |  err: | 
 | 549 | 	spin_lock_irqsave(&balloon_lock, flags); | 
 | 550 | 	while (--i >= 0) | 
 | 551 | 		balloon_append(pagevec[i]); | 
 | 552 | 	spin_unlock_irqrestore(&balloon_lock, flags); | 
 | 553 | 	kfree(pagevec); | 
 | 554 | 	pagevec = NULL; | 
 | 555 | 	goto out; | 
 | 556 | } | 
 | 557 |  | 
 | 558 | static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) | 
 | 559 | { | 
 | 560 | 	unsigned long flags; | 
 | 561 | 	int i; | 
 | 562 |  | 
 | 563 | 	if (pagevec == NULL) | 
 | 564 | 		return; | 
 | 565 |  | 
 | 566 | 	spin_lock_irqsave(&balloon_lock, flags); | 
 | 567 | 	for (i = 0; i < nr_pages; i++) { | 
 | 568 | 		BUG_ON(page_count(pagevec[i]) != 1); | 
 | 569 | 		balloon_append(pagevec[i]); | 
 | 570 | 	} | 
 | 571 | 	spin_unlock_irqrestore(&balloon_lock, flags); | 
 | 572 |  | 
 | 573 | 	kfree(pagevec); | 
 | 574 |  | 
 | 575 | 	schedule_work(&balloon_worker); | 
 | 576 | } | 
 | 577 |  | 
 | 578 | static void balloon_release_driver_page(struct page *page) | 
 | 579 | { | 
 | 580 | 	unsigned long flags; | 
 | 581 |  | 
 | 582 | 	spin_lock_irqsave(&balloon_lock, flags); | 
 | 583 | 	balloon_append(page); | 
 | 584 | 	balloon_stats.driver_pages--; | 
 | 585 | 	spin_unlock_irqrestore(&balloon_lock, flags); | 
 | 586 |  | 
 | 587 | 	schedule_work(&balloon_worker); | 
 | 588 | } | 
 | 589 |  | 
 | 590 |  | 
 | 591 | #define BALLOON_SHOW(name, format, args...)			\ | 
 | 592 | 	static ssize_t show_##name(struct sys_device *dev,	\ | 
 | 593 | 				   char *buf)			\ | 
 | 594 | 	{							\ | 
 | 595 | 		return sprintf(buf, format, ##args);		\ | 
 | 596 | 	}							\ | 
 | 597 | 	static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) | 
 | 598 |  | 
 | 599 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); | 
 | 600 | BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); | 
 | 601 | BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); | 
 | 602 | BALLOON_SHOW(hard_limit_kb, | 
 | 603 | 	     (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n", | 
 | 604 | 	     (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); | 
 | 605 | BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); | 
 | 606 |  | 
 | 607 | static ssize_t show_target_kb(struct sys_device *dev, char *buf) | 
 | 608 | { | 
 | 609 | 	return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); | 
 | 610 | } | 
 | 611 |  | 
 | 612 | static ssize_t store_target_kb(struct sys_device *dev, | 
 | 613 | 			       const char *buf, | 
 | 614 | 			       size_t count) | 
 | 615 | { | 
 | 616 | 	char memstring[64], *endchar; | 
 | 617 | 	unsigned long long target_bytes; | 
 | 618 |  | 
 | 619 | 	if (!capable(CAP_SYS_ADMIN)) | 
 | 620 | 		return -EPERM; | 
 | 621 |  | 
 | 622 | 	if (count <= 1) | 
 | 623 | 		return -EBADMSG; /* runt */ | 
 | 624 | 	if (count > sizeof(memstring)) | 
 | 625 | 		return -EFBIG;   /* too long */ | 
 | 626 | 	strcpy(memstring, buf); | 
 | 627 |  | 
 | 628 | 	target_bytes = memparse(memstring, &endchar); | 
 | 629 | 	balloon_set_new_target(target_bytes >> PAGE_SHIFT); | 
 | 630 |  | 
 | 631 | 	return count; | 
 | 632 | } | 
 | 633 |  | 
 | 634 | static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, | 
 | 635 | 		   show_target_kb, store_target_kb); | 
 | 636 |  | 
 | 637 | static struct sysdev_attribute *balloon_attrs[] = { | 
 | 638 | 	&attr_target_kb, | 
 | 639 | }; | 
 | 640 |  | 
 | 641 | static struct attribute *balloon_info_attrs[] = { | 
 | 642 | 	&attr_current_kb.attr, | 
 | 643 | 	&attr_low_kb.attr, | 
 | 644 | 	&attr_high_kb.attr, | 
 | 645 | 	&attr_hard_limit_kb.attr, | 
 | 646 | 	&attr_driver_kb.attr, | 
 | 647 | 	NULL | 
 | 648 | }; | 
 | 649 |  | 
 | 650 | static struct attribute_group balloon_info_group = { | 
 | 651 | 	.name = "info", | 
 | 652 | 	.attrs = balloon_info_attrs, | 
 | 653 | }; | 
 | 654 |  | 
 | 655 | static struct sysdev_class balloon_sysdev_class = { | 
 | 656 | 	.name = BALLOON_CLASS_NAME, | 
 | 657 | }; | 
 | 658 |  | 
 | 659 | static int register_balloon(struct sys_device *sysdev) | 
 | 660 | { | 
 | 661 | 	int i, error; | 
 | 662 |  | 
 | 663 | 	error = sysdev_class_register(&balloon_sysdev_class); | 
 | 664 | 	if (error) | 
 | 665 | 		return error; | 
 | 666 |  | 
 | 667 | 	sysdev->id = 0; | 
 | 668 | 	sysdev->cls = &balloon_sysdev_class; | 
 | 669 |  | 
 | 670 | 	error = sysdev_register(sysdev); | 
 | 671 | 	if (error) { | 
 | 672 | 		sysdev_class_unregister(&balloon_sysdev_class); | 
 | 673 | 		return error; | 
 | 674 | 	} | 
 | 675 |  | 
 | 676 | 	for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { | 
 | 677 | 		error = sysdev_create_file(sysdev, balloon_attrs[i]); | 
 | 678 | 		if (error) | 
 | 679 | 			goto fail; | 
 | 680 | 	} | 
 | 681 |  | 
 | 682 | 	error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); | 
 | 683 | 	if (error) | 
 | 684 | 		goto fail; | 
 | 685 |  | 
 | 686 | 	return 0; | 
 | 687 |  | 
 | 688 |  fail: | 
 | 689 | 	while (--i >= 0) | 
 | 690 | 		sysdev_remove_file(sysdev, balloon_attrs[i]); | 
 | 691 | 	sysdev_unregister(sysdev); | 
 | 692 | 	sysdev_class_unregister(&balloon_sysdev_class); | 
 | 693 | 	return error; | 
 | 694 | } | 
 | 695 |  | 
 | 696 | static void unregister_balloon(struct sys_device *sysdev) | 
 | 697 | { | 
 | 698 | 	int i; | 
 | 699 |  | 
 | 700 | 	sysfs_remove_group(&sysdev->kobj, &balloon_info_group); | 
 | 701 | 	for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) | 
 | 702 | 		sysdev_remove_file(sysdev, balloon_attrs[i]); | 
 | 703 | 	sysdev_unregister(sysdev); | 
 | 704 | 	sysdev_class_unregister(&balloon_sysdev_class); | 
 | 705 | } | 
 | 706 |  | 
 | 707 | static void balloon_sysfs_exit(void) | 
 | 708 | { | 
 | 709 | 	unregister_balloon(&balloon_sysdev); | 
 | 710 | } | 
 | 711 |  | 
 | 712 | MODULE_LICENSE("GPL"); |