Blame - kernel/kexec.c - kernel/msm-4.9

blob: cb85c79989b440a3979e3d2e6dd3bcfa198b1c3b [file] [log] [blame]

Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1	/*
				2	* kexec.c - kexec system call
				3	* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
				4	*
				5	* This source code is licensed under the GNU General Public License,
				6	* Version 2. See the file COPYING for more details.
				7	*/
				8
Randy.Dunlap	c59ede7	2006-01-11 12:17:46 -0800	[diff] [blame]	9	#include <linux/capability.h>
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	10	#include <linux/mm.h>
				11	#include <linux/file.h>
				12	#include <linux/slab.h>
				13	#include <linux/fs.h>
				14	#include <linux/kexec.h>
				15	#include <linux/spinlock.h>
				16	#include <linux/list.h>
				17	#include <linux/highmem.h>
				18	#include <linux/syscalls.h>
				19	#include <linux/reboot.h>
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	20	#include <linux/ioport.h>
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	21	#include <linux/hardirq.h>
Magnus Damm	85916f8	2006-12-06 20:40:41 -0800	[diff] [blame]	22	#include <linux/elf.h>
				23	#include <linux/elfcore.h>
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	24	#include <linux/utsrelease.h>
				25	#include <linux/utsname.h>
				26	#include <linux/numa.h>
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	27
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	28	#include <asm/page.h>
				29	#include <asm/uaccess.h>
				30	#include <asm/io.h>
				31	#include <asm/system.h>
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	32	#include <asm/sections.h>
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	33
Vivek Goyal	cc57165	2006-01-09 20:51:41 -0800	[diff] [blame]	34	/* Per cpu memory for storing cpu states in case of system crash. */
				35	note_buf_t* crash_notes;
				36
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	37	/* vmcoreinfo stuff */
				38	unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
				39	u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
Ken'ichi Ohmichi	d768281	2007-10-16 23:27:28 -0700	[diff] [blame]	40	size_t vmcoreinfo_size;
				41	size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	42
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	43	/* Location of the reserved area for the crash kernel */
				44	struct resource crashk_res = {
				45	.name = "Crash kernel",
				46	.start = 0,
				47	.end = 0,
				48	.flags = IORESOURCE_BUSY \| IORESOURCE_MEM
				49	};
				50
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	51	int kexec_should_crash(struct task_struct *p)
				52	{
Serge E. Hallyn	b460cbc	2007-10-18 23:39:52 -0700	[diff] [blame]	53	if (in_interrupt() \|\| !p->pid \|\| is_global_init(p) \|\| panic_on_oops)
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	54	return 1;
				55	return 0;
				56	}
				57
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	58	/*
				59	* When kexec transitions to the new kernel there is a one-to-one
				60	* mapping between physical and virtual addresses. On processors
				61	* where you can disable the MMU this is trivial, and easy. For
				62	* others it is still a simple predictable page table to setup.
				63	*
				64	* In that environment kexec copies the new kernel to its final
				65	* resting place. This means I can only support memory whose
				66	* physical address can fit in an unsigned long. In particular
				67	* addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
				68	* If the assembly stub has more restrictive requirements
				69	* KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
				70	* defined more restrictively in <asm/kexec.h>.
				71	*
				72	* The code for the transition from the current kernel to the
				73	* the new kernel is placed in the control_code_buffer, whose size
				74	* is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single
				75	* page of memory is necessary, but some architectures require more.
				76	* Because this memory must be identity mapped in the transition from
				77	* virtual to physical addresses it must live in the range
				78	* 0 - TASK_SIZE, as only the user space mappings are arbitrarily
				79	* modifiable.
				80	*
				81	* The assembly stub in the control code buffer is passed a linked list
				82	* of descriptor pages detailing the source pages of the new kernel,
				83	* and the destination addresses of those source pages. As this data
				84	* structure is not used in the context of the current OS, it must
				85	* be self-contained.
				86	*
				87	* The code has been made to work with highmem pages and will use a
				88	* destination page in its final resting place (if it happens
				89	* to allocate it). The end product of this is that most of the
				90	* physical address space, and most of RAM can be used.
				91	*
				92	* Future directions include:
				93	* - allocating a page table with the control code buffer identity
				94	* mapped, to simplify machine_kexec and make kexec_on_panic more
				95	* reliable.
				96	*/
				97
				98	/*
				99	* KIMAGE_NO_DEST is an impossible destination address..., for
				100	* allocating pages whose destination address we do not care about.
				101	*/
				102	#define KIMAGE_NO_DEST (-1UL)
				103
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	104	static int kimage_is_destination_range(struct kimage *image,
				105	unsigned long start, unsigned long end);
				106	static struct page kimage_alloc_page(struct kimage image,
Al Viro	9796fdd	2005-10-21 03:22:03 -0400	[diff] [blame]	107	gfp_t gfp_mask,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	108	unsigned long dest);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	109
				110	static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	111	unsigned long nr_segments,
				112	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	113	{
				114	size_t segment_bytes;
				115	struct kimage *image;
				116	unsigned long i;
				117	int result;
				118
				119	/* Allocate a controlling structure */
				120	result = -ENOMEM;
Burman Yan	4668edc	2006-12-06 20:38:51 -0800	[diff] [blame]	121	image = kzalloc(sizeof(*image), GFP_KERNEL);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	122	if (!image)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	123	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	124
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	125	image->head = 0;
				126	image->entry = &image->head;
				127	image->last_entry = &image->head;
				128	image->control_page = ~0; /* By default this does not apply */
				129	image->start = entry;
				130	image->type = KEXEC_TYPE_DEFAULT;
				131
				132	/* Initialize the list of control pages */
				133	INIT_LIST_HEAD(&image->control_pages);
				134
				135	/* Initialize the list of destination pages */
				136	INIT_LIST_HEAD(&image->dest_pages);
				137
				138	/* Initialize the list of unuseable pages */
				139	INIT_LIST_HEAD(&image->unuseable_pages);
				140
				141	/* Read in the segments */
				142	image->nr_segments = nr_segments;
				143	segment_bytes = nr_segments * sizeof(*segments);
				144	result = copy_from_user(image->segment, segments, segment_bytes);
				145	if (result)
				146	goto out;
				147
				148	/*
				149	* Verify we have good destination addresses. The caller is
				150	* responsible for making certain we don't attempt to load
				151	* the new image into invalid or reserved areas of RAM. This
				152	* just verifies it is an address we can use.
				153	*
				154	* Since the kernel does everything in page size chunks ensure
				155	* the destination addreses are page aligned. Too many
				156	* special cases crop of when we don't do this. The most
				157	* insidious is getting overlapping destination addresses
				158	* simply because addresses are changed to page size
				159	* granularity.
				160	*/
				161	result = -EADDRNOTAVAIL;
				162	for (i = 0; i < nr_segments; i++) {
				163	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	164
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	165	mstart = image->segment[i].mem;
				166	mend = mstart + image->segment[i].memsz;
				167	if ((mstart & ~PAGE_MASK) \|\| (mend & ~PAGE_MASK))
				168	goto out;
				169	if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
				170	goto out;
				171	}
				172
				173	/* Verify our destination addresses do not overlap.
				174	* If we alloed overlapping destination addresses
				175	* through very weird things can happen with no
				176	* easy explanation as one segment stops on another.
				177	*/
				178	result = -EINVAL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	179	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	180	unsigned long mstart, mend;
				181	unsigned long j;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	182
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	183	mstart = image->segment[i].mem;
				184	mend = mstart + image->segment[i].memsz;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	185	for (j = 0; j < i; j++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	186	unsigned long pstart, pend;
				187	pstart = image->segment[j].mem;
				188	pend = pstart + image->segment[j].memsz;
				189	/* Do the segments overlap ? */
				190	if ((mend > pstart) && (mstart < pend))
				191	goto out;
				192	}
				193	}
				194
				195	/* Ensure our buffer sizes are strictly less than
				196	* our memory sizes. This should always be the case,
				197	* and it is easier to check up front than to be surprised
				198	* later on.
				199	*/
				200	result = -EINVAL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	201	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	202	if (image->segment[i].bufsz > image->segment[i].memsz)
				203	goto out;
				204	}
				205
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	206	result = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	207	out:
				208	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	209	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	210	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	211	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	212
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	213	return result;
				214
				215	}
				216
				217	static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	218	unsigned long nr_segments,
				219	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	220	{
				221	int result;
				222	struct kimage *image;
				223
				224	/* Allocate and initialize a controlling structure */
				225	image = NULL;
				226	result = do_kimage_alloc(&image, entry, nr_segments, segments);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	227	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	228	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	229
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	230	*rimage = image;
				231
				232	/*
				233	* Find a location for the control code buffer, and add it
				234	* the vector of segments so that it's pages will also be
				235	* counted as destination pages.
				236	*/
				237	result = -ENOMEM;
				238	image->control_code_page = kimage_alloc_control_pages(image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	239	get_order(KEXEC_CONTROL_CODE_SIZE));
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	240	if (!image->control_code_page) {
				241	printk(KERN_ERR "Could not allocate control_code_buffer\n");
				242	goto out;
				243	}
				244
				245	result = 0;
				246	out:
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	247	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	248	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	249	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	250	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	251
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	252	return result;
				253	}
				254
				255	static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	256	unsigned long nr_segments,
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	257	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	258	{
				259	int result;
				260	struct kimage *image;
				261	unsigned long i;
				262
				263	image = NULL;
				264	/* Verify we have a valid entry point */
				265	if ((entry < crashk_res.start) \|\| (entry > crashk_res.end)) {
				266	result = -EADDRNOTAVAIL;
				267	goto out;
				268	}
				269
				270	/* Allocate and initialize a controlling structure */
				271	result = do_kimage_alloc(&image, entry, nr_segments, segments);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	272	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	273	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	274
				275	/* Enable the special crash kernel control page
				276	* allocation policy.
				277	*/
				278	image->control_page = crashk_res.start;
				279	image->type = KEXEC_TYPE_CRASH;
				280
				281	/*
				282	* Verify we have good destination addresses. Normally
				283	* the caller is responsible for making certain we don't
				284	* attempt to load the new image into invalid or reserved
				285	* areas of RAM. But crash kernels are preloaded into a
				286	* reserved area of ram. We must ensure the addresses
				287	* are in the reserved area otherwise preloading the
				288	* kernel could corrupt things.
				289	*/
				290	result = -EADDRNOTAVAIL;
				291	for (i = 0; i < nr_segments; i++) {
				292	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	293
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	294	mstart = image->segment[i].mem;
Vivek Goyal	50cccc6	2005-06-25 14:57:55 -0700	[diff] [blame]	295	mend = mstart + image->segment[i].memsz - 1;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	296	/* Ensure we are within the crash kernel limits */
				297	if ((mstart < crashk_res.start) \|\| (mend > crashk_res.end))
				298	goto out;
				299	}
				300
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	301	/*
				302	* Find a location for the control code buffer, and add
				303	* the vector of segments so that it's pages will also be
				304	* counted as destination pages.
				305	*/
				306	result = -ENOMEM;
				307	image->control_code_page = kimage_alloc_control_pages(image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	308	get_order(KEXEC_CONTROL_CODE_SIZE));
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	309	if (!image->control_code_page) {
				310	printk(KERN_ERR "Could not allocate control_code_buffer\n");
				311	goto out;
				312	}
				313
				314	result = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	315	out:
				316	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	317	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	318	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	319	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	320
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	321	return result;
				322	}
				323
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	324	static int kimage_is_destination_range(struct kimage *image,
				325	unsigned long start,
				326	unsigned long end)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	327	{
				328	unsigned long i;
				329
				330	for (i = 0; i < image->nr_segments; i++) {
				331	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	332
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	333	mstart = image->segment[i].mem;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	334	mend = mstart + image->segment[i].memsz;
				335	if ((end > mstart) && (start < mend))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	336	return 1;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	337	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	338
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	339	return 0;
				340	}
				341
Al Viro	9796fdd	2005-10-21 03:22:03 -0400	[diff] [blame]	342	static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	343	{
				344	struct page *pages;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	345
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	346	pages = alloc_pages(gfp_mask, order);
				347	if (pages) {
				348	unsigned int count, i;
				349	pages->mapping = NULL;
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	350	set_page_private(pages, order);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	351	count = 1 << order;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	352	for (i = 0; i < count; i++)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	353	SetPageReserved(pages + i);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	354	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	355
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	356	return pages;
				357	}
				358
				359	static void kimage_free_pages(struct page *page)
				360	{
				361	unsigned int order, count, i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	362
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	363	order = page_private(page);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	364	count = 1 << order;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	365	for (i = 0; i < count; i++)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	366	ClearPageReserved(page + i);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	367	__free_pages(page, order);
				368	}
				369
				370	static void kimage_free_page_list(struct list_head *list)
				371	{
				372	struct list_head pos, next;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	373
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	374	list_for_each_safe(pos, next, list) {
				375	struct page *page;
				376
				377	page = list_entry(pos, struct page, lru);
				378	list_del(&page->lru);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	379	kimage_free_pages(page);
				380	}
				381	}
				382
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	383	static struct page kimage_alloc_normal_control_pages(struct kimage image,
				384	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	385	{
				386	/* Control pages are special, they are the intermediaries
				387	* that are needed while we copy the rest of the pages
				388	* to their final resting place. As such they must
				389	* not conflict with either the destination addresses
				390	* or memory the kernel is already using.
				391	*
				392	* The only case where we really need more than one of
				393	* these are for architectures where we cannot disable
				394	* the MMU and must instead generate an identity mapped
				395	* page table for all of the memory.
				396	*
				397	* At worst this runs in O(N) of the image size.
				398	*/
				399	struct list_head extra_pages;
				400	struct page *pages;
				401	unsigned int count;
				402
				403	count = 1 << order;
				404	INIT_LIST_HEAD(&extra_pages);
				405
				406	/* Loop while I can allocate a page and the page allocated
				407	* is a destination page.
				408	*/
				409	do {
				410	unsigned long pfn, epfn, addr, eaddr;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	411
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	412	pages = kimage_alloc_pages(GFP_KERNEL, order);
				413	if (!pages)
				414	break;
				415	pfn = page_to_pfn(pages);
				416	epfn = pfn + count;
				417	addr = pfn << PAGE_SHIFT;
				418	eaddr = epfn << PAGE_SHIFT;
				419	if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) \|\|
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	420	kimage_is_destination_range(image, addr, eaddr)) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	421	list_add(&pages->lru, &extra_pages);
				422	pages = NULL;
				423	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	424	} while (!pages);
				425
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	426	if (pages) {
				427	/* Remember the allocated page... */
				428	list_add(&pages->lru, &image->control_pages);
				429
				430	/* Because the page is already in it's destination
				431	* location we will never allocate another page at
				432	* that address. Therefore kimage_alloc_pages
				433	* will not return it (again) and we don't need
				434	* to give it an entry in image->segment[].
				435	*/
				436	}
				437	/* Deal with the destination pages I have inadvertently allocated.
				438	*
				439	* Ideally I would convert multi-page allocations into single
				440	* page allocations, and add everyting to image->dest_pages.
				441	*
				442	* For now it is simpler to just free the pages.
				443	*/
				444	kimage_free_page_list(&extra_pages);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	445
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	446	return pages;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	447	}
				448
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	449	static struct page kimage_alloc_crash_control_pages(struct kimage image,
				450	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	451	{
				452	/* Control pages are special, they are the intermediaries
				453	* that are needed while we copy the rest of the pages
				454	* to their final resting place. As such they must
				455	* not conflict with either the destination addresses
				456	* or memory the kernel is already using.
				457	*
				458	* Control pages are also the only pags we must allocate
				459	* when loading a crash kernel. All of the other pages
				460	* are specified by the segments and we just memcpy
				461	* into them directly.
				462	*
				463	* The only case where we really need more than one of
				464	* these are for architectures where we cannot disable
				465	* the MMU and must instead generate an identity mapped
				466	* page table for all of the memory.
				467	*
				468	* Given the low demand this implements a very simple
				469	* allocator that finds the first hole of the appropriate
				470	* size in the reserved memory region, and allocates all
				471	* of the memory up to and including the hole.
				472	*/
				473	unsigned long hole_start, hole_end, size;
				474	struct page *pages;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	475
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	476	pages = NULL;
				477	size = (1 << order) << PAGE_SHIFT;
				478	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
				479	hole_end = hole_start + size - 1;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	480	while (hole_end <= crashk_res.end) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	481	unsigned long i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	482
				483	if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	484	break;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	485	if (hole_end > crashk_res.end)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	486	break;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	487	/* See if I overlap any of the segments */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	488	for (i = 0; i < image->nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	489	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	490
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	491	mstart = image->segment[i].mem;
				492	mend = mstart + image->segment[i].memsz - 1;
				493	if ((hole_end >= mstart) && (hole_start <= mend)) {
				494	/* Advance the hole to the end of the segment */
				495	hole_start = (mend + (size - 1)) & ~(size - 1);
				496	hole_end = hole_start + size - 1;
				497	break;
				498	}
				499	}
				500	/* If I don't overlap any segments I have found my hole! */
				501	if (i == image->nr_segments) {
				502	pages = pfn_to_page(hole_start >> PAGE_SHIFT);
				503	break;
				504	}
				505	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	506	if (pages)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	507	image->control_page = hole_end;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	508
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	509	return pages;
				510	}
				511
				512
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	513	struct page kimage_alloc_control_pages(struct kimage image,
				514	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	515	{
				516	struct page *pages = NULL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	517
				518	switch (image->type) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	519	case KEXEC_TYPE_DEFAULT:
				520	pages = kimage_alloc_normal_control_pages(image, order);
				521	break;
				522	case KEXEC_TYPE_CRASH:
				523	pages = kimage_alloc_crash_control_pages(image, order);
				524	break;
				525	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	526
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	527	return pages;
				528	}
				529
				530	static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
				531	{
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	532	if (*image->entry != 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	533	image->entry++;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	534
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	535	if (image->entry == image->last_entry) {
				536	kimage_entry_t *ind_page;
				537	struct page *page;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	538
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	539	page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	540	if (!page)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	541	return -ENOMEM;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	542
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	543	ind_page = page_address(page);
				544	*image->entry = virt_to_phys(ind_page) \| IND_INDIRECTION;
				545	image->entry = ind_page;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	546	image->last_entry = ind_page +
				547	((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	548	}
				549	*image->entry = entry;
				550	image->entry++;
				551	*image->entry = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	552
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	553	return 0;
				554	}
				555
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	556	static int kimage_set_destination(struct kimage *image,
				557	unsigned long destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	558	{
				559	int result;
				560
				561	destination &= PAGE_MASK;
				562	result = kimage_add_entry(image, destination \| IND_DESTINATION);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	563	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	564	image->destination = destination;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	565
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	566	return result;
				567	}
				568
				569
				570	static int kimage_add_page(struct kimage *image, unsigned long page)
				571	{
				572	int result;
				573
				574	page &= PAGE_MASK;
				575	result = kimage_add_entry(image, page \| IND_SOURCE);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	576	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	577	image->destination += PAGE_SIZE;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	578
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	579	return result;
				580	}
				581
				582
				583	static void kimage_free_extra_pages(struct kimage *image)
				584	{
				585	/* Walk through and free any extra destination pages I may have */
				586	kimage_free_page_list(&image->dest_pages);
				587
				588	/* Walk through and free any unuseable pages I have cached */
				589	kimage_free_page_list(&image->unuseable_pages);
				590
				591	}
				592	static int kimage_terminate(struct kimage *image)
				593	{
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	594	if (*image->entry != 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	595	image->entry++;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	596
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	597	*image->entry = IND_DONE;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	598
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	599	return 0;
				600	}
				601
				602	#define for_each_kimage_entry(image, ptr, entry) \
				603	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
				604	ptr = (entry & IND_INDIRECTION)? \
				605	phys_to_virt((entry & PAGE_MASK)): ptr +1)
				606
				607	static void kimage_free_entry(kimage_entry_t entry)
				608	{
				609	struct page *page;
				610
				611	page = pfn_to_page(entry >> PAGE_SHIFT);
				612	kimage_free_pages(page);
				613	}
				614
				615	static void kimage_free(struct kimage *image)
				616	{
				617	kimage_entry_t *ptr, entry;
				618	kimage_entry_t ind = 0;
				619
				620	if (!image)
				621	return;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	622
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	623	kimage_free_extra_pages(image);
				624	for_each_kimage_entry(image, ptr, entry) {
				625	if (entry & IND_INDIRECTION) {
				626	/* Free the previous indirection page */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	627	if (ind & IND_INDIRECTION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	628	kimage_free_entry(ind);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	629	/* Save this indirection page until we are
				630	* done with it.
				631	*/
				632	ind = entry;
				633	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	634	else if (entry & IND_SOURCE)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	635	kimage_free_entry(entry);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	636	}
				637	/* Free the final indirection page */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	638	if (ind & IND_INDIRECTION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	639	kimage_free_entry(ind);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	640
				641	/* Handle any machine specific cleanup */
				642	machine_kexec_cleanup(image);
				643
				644	/* Free the kexec control pages... */
				645	kimage_free_page_list(&image->control_pages);
				646	kfree(image);
				647	}
				648
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	649	static kimage_entry_t kimage_dst_used(struct kimage image,
				650	unsigned long page)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	651	{
				652	kimage_entry_t *ptr, entry;
				653	unsigned long destination = 0;
				654
				655	for_each_kimage_entry(image, ptr, entry) {
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	656	if (entry & IND_DESTINATION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	657	destination = entry & PAGE_MASK;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	658	else if (entry & IND_SOURCE) {
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	659	if (page == destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	660	return ptr;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	661	destination += PAGE_SIZE;
				662	}
				663	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	664
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	665	return NULL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	666	}
				667
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	668	static struct page kimage_alloc_page(struct kimage image,
Al Viro	9796fdd	2005-10-21 03:22:03 -0400	[diff] [blame]	669	gfp_t gfp_mask,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	670	unsigned long destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	671	{
				672	/*
				673	* Here we implement safeguards to ensure that a source page
				674	* is not copied to its destination page before the data on
				675	* the destination page is no longer useful.
				676	*
				677	* To do this we maintain the invariant that a source page is
				678	* either its own destination page, or it is not a
				679	* destination page at all.
				680	*
				681	* That is slightly stronger than required, but the proof
				682	* that no problems will not occur is trivial, and the
				683	* implementation is simply to verify.
				684	*
				685	* When allocating all pages normally this algorithm will run
				686	* in O(N) time, but in the worst case it will run in O(N^2)
				687	* time. If the runtime is a problem the data structures can
				688	* be fixed.
				689	*/
				690	struct page *page;
				691	unsigned long addr;
				692
				693	/*
				694	* Walk through the list of destination pages, and see if I
				695	* have a match.
				696	*/
				697	list_for_each_entry(page, &image->dest_pages, lru) {
				698	addr = page_to_pfn(page) << PAGE_SHIFT;
				699	if (addr == destination) {
				700	list_del(&page->lru);
				701	return page;
				702	}
				703	}
				704	page = NULL;
				705	while (1) {
				706	kimage_entry_t *old;
				707
				708	/* Allocate a page, if we run out of memory give up */
				709	page = kimage_alloc_pages(gfp_mask, 0);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	710	if (!page)
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	711	return NULL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	712	/* If the page cannot be used file it away */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	713	if (page_to_pfn(page) >
				714	(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	715	list_add(&page->lru, &image->unuseable_pages);
				716	continue;
				717	}
				718	addr = page_to_pfn(page) << PAGE_SHIFT;
				719
				720	/* If it is the destination page we want use it */
				721	if (addr == destination)
				722	break;
				723
				724	/* If the page is not a destination page use it */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	725	if (!kimage_is_destination_range(image, addr,
				726	addr + PAGE_SIZE))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	727	break;
				728
				729	/*
				730	* I know that the page is someones destination page.
				731	* See if there is already a source page for this
				732	* destination page. And if so swap the source pages.
				733	*/
				734	old = kimage_dst_used(image, addr);
				735	if (old) {
				736	/* If so move it */
				737	unsigned long old_addr;
				738	struct page *old_page;
				739
				740	old_addr = *old & PAGE_MASK;
				741	old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
				742	copy_highpage(page, old_page);
				743	old = addr \| (old & ~PAGE_MASK);
				744
				745	/* The old page I have found cannot be a
				746	* destination page, so return it.
				747	*/
				748	addr = old_addr;
				749	page = old_page;
				750	break;
				751	}
				752	else {
				753	/* Place the page on the destination list I
				754	* will use it later.
				755	*/
				756	list_add(&page->lru, &image->dest_pages);
				757	}
				758	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	759
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	760	return page;
				761	}
				762
				763	static int kimage_load_normal_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	764	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	765	{
				766	unsigned long maddr;
				767	unsigned long ubytes, mbytes;
				768	int result;
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	769	unsigned char __user *buf;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	770
				771	result = 0;
				772	buf = segment->buf;
				773	ubytes = segment->bufsz;
				774	mbytes = segment->memsz;
				775	maddr = segment->mem;
				776
				777	result = kimage_set_destination(image, maddr);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	778	if (result < 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	779	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	780
				781	while (mbytes) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	782	struct page *page;
				783	char *ptr;
				784	size_t uchunk, mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	785
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	786	page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
Stephen Hemminger	c80544d	2007-10-18 03:07:05 -0700	[diff] [blame]	787	if (!page) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	788	result = -ENOMEM;
				789	goto out;
				790	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	791	result = kimage_add_page(image, page_to_pfn(page)
				792	<< PAGE_SHIFT);
				793	if (result < 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	794	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	795
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	796	ptr = kmap(page);
				797	/* Start with a clear page */
				798	memset(ptr, 0, PAGE_SIZE);
				799	ptr += maddr & ~PAGE_MASK;
				800	mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	801	if (mchunk > mbytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	802	mchunk = mbytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	803
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	804	uchunk = mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	805	if (uchunk > ubytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	806	uchunk = ubytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	807
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	808	result = copy_from_user(ptr, buf, uchunk);
				809	kunmap(page);
				810	if (result) {
				811	result = (result < 0) ? result : -EIO;
				812	goto out;
				813	}
				814	ubytes -= uchunk;
				815	maddr += mchunk;
				816	buf += mchunk;
				817	mbytes -= mchunk;
				818	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	819	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	820	return result;
				821	}
				822
				823	static int kimage_load_crash_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	824	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	825	{
				826	/* For crash dumps kernels we simply copy the data from
				827	* user space to it's destination.
				828	* We do things a page at a time for the sake of kmap.
				829	*/
				830	unsigned long maddr;
				831	unsigned long ubytes, mbytes;
				832	int result;
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	833	unsigned char __user *buf;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	834
				835	result = 0;
				836	buf = segment->buf;
				837	ubytes = segment->bufsz;
				838	mbytes = segment->memsz;
				839	maddr = segment->mem;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	840	while (mbytes) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	841	struct page *page;
				842	char *ptr;
				843	size_t uchunk, mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	844
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	845	page = pfn_to_page(maddr >> PAGE_SHIFT);
Stephen Hemminger	c80544d	2007-10-18 03:07:05 -0700	[diff] [blame]	846	if (!page) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	847	result = -ENOMEM;
				848	goto out;
				849	}
				850	ptr = kmap(page);
				851	ptr += maddr & ~PAGE_MASK;
				852	mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	853	if (mchunk > mbytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	854	mchunk = mbytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	855
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	856	uchunk = mchunk;
				857	if (uchunk > ubytes) {
				858	uchunk = ubytes;
				859	/* Zero the trailing part of the page */
				860	memset(ptr + uchunk, 0, mchunk - uchunk);
				861	}
				862	result = copy_from_user(ptr, buf, uchunk);
Zou Nan hai	a7956113	2006-12-07 09:51:35 -0800	[diff] [blame]	863	kexec_flush_icache_page(page);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	864	kunmap(page);
				865	if (result) {
				866	result = (result < 0) ? result : -EIO;
				867	goto out;
				868	}
				869	ubytes -= uchunk;
				870	maddr += mchunk;
				871	buf += mchunk;
				872	mbytes -= mchunk;
				873	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	874	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	875	return result;
				876	}
				877
				878	static int kimage_load_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	879	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	880	{
				881	int result = -ENOMEM;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	882
				883	switch (image->type) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	884	case KEXEC_TYPE_DEFAULT:
				885	result = kimage_load_normal_segment(image, segment);
				886	break;
				887	case KEXEC_TYPE_CRASH:
				888	result = kimage_load_crash_segment(image, segment);
				889	break;
				890	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	891
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	892	return result;
				893	}
				894
				895	/*
				896	* Exec Kernel system call: for obvious reasons only root may call it.
				897	*
				898	* This call breaks up into three pieces.
				899	* - A generic part which loads the new kernel from the current
				900	* address space, and very carefully places the data in the
				901	* allocated pages.
				902	*
				903	* - A generic part that interacts with the kernel and tells all of
				904	* the devices to shut down. Preventing on-going dmas, and placing
				905	* the devices in a consistent state so a later kernel can
				906	* reinitialize them.
				907	*
				908	* - A machine specific part that includes the syscall number
				909	* and the copies the image to it's final destination. And
				910	* jumps into the image at entry.
				911	*
				912	* kexec does not sync, or unmount filesystems so if you need
				913	* that to happen you need to do that yourself.
				914	*/
Jeff Moyer	c330dda	2006-06-23 02:05:07 -0700	[diff] [blame]	915	struct kimage *kexec_image;
				916	struct kimage *kexec_crash_image;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	917	/*
				918	* A home grown binary mutex.
				919	* Nothing can wait so this mutex is safe to use
				920	* in interrupt context :)
				921	*/
Jeff Moyer	c330dda	2006-06-23 02:05:07 -0700	[diff] [blame]	922	static int kexec_lock;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	923
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	924	asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
				925	struct kexec_segment __user *segments,
				926	unsigned long flags)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	927	{
				928	struct kimage *dest_image, image;
				929	int locked;
				930	int result;
				931
				932	/* We only trust the superuser with rebooting the system. */
				933	if (!capable(CAP_SYS_BOOT))
				934	return -EPERM;
				935
				936	/*
				937	* Verify we have a legal set of flags
				938	* This leaves us room for future extensions.
				939	*/
				940	if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
				941	return -EINVAL;
				942
				943	/* Verify we are on the appropriate architecture */
				944	if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
				945	((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	946	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	947
				948	/* Put an artificial cap on the number
				949	* of segments passed to kexec_load.
				950	*/
				951	if (nr_segments > KEXEC_SEGMENT_MAX)
				952	return -EINVAL;
				953
				954	image = NULL;
				955	result = 0;
				956
				957	/* Because we write directly to the reserved memory
				958	* region when loading crash kernels we need a mutex here to
				959	* prevent multiple crash kernels from attempting to load
				960	* simultaneously, and to prevent a crash kernel from loading
				961	* over the top of a in use crash kernel.
				962	*
				963	* KISS: always take the mutex.
				964	*/
				965	locked = xchg(&kexec_lock, 1);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	966	if (locked)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	967	return -EBUSY;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	968
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	969	dest_image = &kexec_image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	970	if (flags & KEXEC_ON_CRASH)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	971	dest_image = &kexec_crash_image;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	972	if (nr_segments > 0) {
				973	unsigned long i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	974
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	975	/* Loading another kernel to reboot into */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	976	if ((flags & KEXEC_ON_CRASH) == 0)
				977	result = kimage_normal_alloc(&image, entry,
				978	nr_segments, segments);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	979	/* Loading another kernel to switch to if this one crashes */
				980	else if (flags & KEXEC_ON_CRASH) {
				981	/* Free any current crash dump kernel before
				982	* we corrupt it.
				983	*/
				984	kimage_free(xchg(&kexec_crash_image, NULL));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	985	result = kimage_crash_alloc(&image, entry,
				986	nr_segments, segments);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	987	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	988	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	989	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	990
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	991	result = machine_kexec_prepare(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	992	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	993	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	994
				995	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	996	result = kimage_load_segment(image, &image->segment[i]);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	997	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	998	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	999	}
				1000	result = kimage_terminate(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1001	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1002	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1003	}
				1004	/* Install the new kernel, and Uninstall the old */
				1005	image = xchg(dest_image, image);
				1006
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1007	out:
Roland McGrath	0b4a8a7	2006-09-29 02:00:39 -0700	[diff] [blame]	1008	locked = xchg(&kexec_lock, 0); /* Release the mutex */
				1009	BUG_ON(!locked);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1010	kimage_free(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1011
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1012	return result;
				1013	}
				1014
				1015	#ifdef CONFIG_COMPAT
				1016	asmlinkage long compat_sys_kexec_load(unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1017	unsigned long nr_segments,
				1018	struct compat_kexec_segment __user *segments,
				1019	unsigned long flags)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1020	{
				1021	struct compat_kexec_segment in;
				1022	struct kexec_segment out, __user *ksegments;
				1023	unsigned long i, result;
				1024
				1025	/* Don't allow clients that don't understand the native
				1026	* architecture to do anything.
				1027	*/
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1028	if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1029	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1030
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1031	if (nr_segments > KEXEC_SEGMENT_MAX)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1032	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1033
				1034	ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
				1035	for (i=0; i < nr_segments; i++) {
				1036	result = copy_from_user(&in, &segments[i], sizeof(in));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1037	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1038	return -EFAULT;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1039
				1040	out.buf = compat_ptr(in.buf);
				1041	out.bufsz = in.bufsz;
				1042	out.mem = in.mem;
				1043	out.memsz = in.memsz;
				1044
				1045	result = copy_to_user(&ksegments[i], &out, sizeof(out));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1046	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1047	return -EFAULT;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1048	}
				1049
				1050	return sys_kexec_load(entry, nr_segments, ksegments, flags);
				1051	}
				1052	#endif
				1053
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	1054	void crash_kexec(struct pt_regs *regs)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1055	{
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1056	int locked;
				1057
				1058
				1059	/* Take the kexec_lock here to prevent sys_kexec_load
				1060	* running on one cpu from replacing the crash kernel
				1061	* we are using after a panic on a different cpu.
				1062	*
				1063	* If the crash kernel was not located in a fixed area
				1064	* of memory the xchg(&kexec_crash_image) would be
				1065	* sufficient. But since I reuse the memory...
				1066	*/
				1067	locked = xchg(&kexec_lock, 1);
				1068	if (!locked) {
David Wilder	c0ce7d0	2006-06-23 15:29:34 -0700	[diff] [blame]	1069	if (kexec_crash_image) {
Vivek Goyal	e996e58	2006-01-09 20:51:44 -0800	[diff] [blame]	1070	struct pt_regs fixed_regs;
				1071	crash_setup_regs(&fixed_regs, regs);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1072	crash_save_vmcoreinfo();
Vivek Goyal	e996e58	2006-01-09 20:51:44 -0800	[diff] [blame]	1073	machine_crash_shutdown(&fixed_regs);
David Wilder	c0ce7d0	2006-06-23 15:29:34 -0700	[diff] [blame]	1074	machine_kexec(kexec_crash_image);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1075	}
Roland McGrath	0b4a8a7	2006-09-29 02:00:39 -0700	[diff] [blame]	1076	locked = xchg(&kexec_lock, 0);
				1077	BUG_ON(!locked);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1078	}
				1079	}
Vivek Goyal	cc57165	2006-01-09 20:51:41 -0800	[diff] [blame]	1080
Magnus Damm	85916f8	2006-12-06 20:40:41 -0800	[diff] [blame]	1081	static u32 append_elf_note(u32 buf, char name, unsigned type, void data,
				1082	size_t data_len)
				1083	{
				1084	struct elf_note note;
				1085
				1086	note.n_namesz = strlen(name) + 1;
				1087	note.n_descsz = data_len;
				1088	note.n_type = type;
				1089	memcpy(buf, &note, sizeof(note));
				1090	buf += (sizeof(note) + 3)/4;
				1091	memcpy(buf, name, note.n_namesz);
				1092	buf += (note.n_namesz + 3)/4;
				1093	memcpy(buf, data, note.n_descsz);
				1094	buf += (note.n_descsz + 3)/4;
				1095
				1096	return buf;
				1097	}
				1098
				1099	static void final_note(u32 *buf)
				1100	{
				1101	struct elf_note note;
				1102
				1103	note.n_namesz = 0;
				1104	note.n_descsz = 0;
				1105	note.n_type = 0;
				1106	memcpy(buf, &note, sizeof(note));
				1107	}
				1108
				1109	void crash_save_cpu(struct pt_regs *regs, int cpu)
				1110	{
				1111	struct elf_prstatus prstatus;
				1112	u32 *buf;
				1113
				1114	if ((cpu < 0) \|\| (cpu >= NR_CPUS))
				1115	return;
				1116
				1117	/* Using ELF notes here is opportunistic.
				1118	* I need a well defined structure format
				1119	* for the data I pass, and I need tags
				1120	* on the data to indicate what information I have
				1121	* squirrelled away. ELF notes happen to provide
				1122	* all of that, so there is no need to invent something new.
				1123	*/
				1124	buf = (u32*)per_cpu_ptr(crash_notes, cpu);
				1125	if (!buf)
				1126	return;
				1127	memset(&prstatus, 0, sizeof(prstatus));
				1128	prstatus.pr_pid = current->pid;
				1129	elf_core_copy_regs(&prstatus.pr_reg, regs);
Simon Horman	6672f76	2007-05-08 00:28:22 -0700	[diff] [blame]	1130	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
				1131	&prstatus, sizeof(prstatus));
Magnus Damm	85916f8	2006-12-06 20:40:41 -0800	[diff] [blame]	1132	final_note(buf);
				1133	}
				1134
Vivek Goyal	cc57165	2006-01-09 20:51:41 -0800	[diff] [blame]	1135	static int __init crash_notes_memory_init(void)
				1136	{
				1137	/* Allocate memory for saving cpu registers. */
				1138	crash_notes = alloc_percpu(note_buf_t);
				1139	if (!crash_notes) {
				1140	printk("Kexec: Memory allocation for saving cpu register"
				1141	" states failed\n");
				1142	return -ENOMEM;
				1143	}
				1144	return 0;
				1145	}
				1146	module_init(crash_notes_memory_init)
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1147
Bernhard Walle	cba63c3	2007-10-18 23:40:58 -0700	[diff] [blame]	1148
				1149	/*
				1150	* parsing the "crashkernel" commandline
				1151	*
				1152	* this code is intended to be called from architecture specific code
				1153	*/
				1154
				1155
				1156	/*
				1157	* This function parses command lines in the format
				1158	*
				1159	* crashkernel=ramsize-range:size[,...][@offset]
				1160	*
				1161	* The function returns 0 on success and -EINVAL on failure.
				1162	*/
				1163	static int __init parse_crashkernel_mem(char *cmdline,
				1164	unsigned long long system_ram,
				1165	unsigned long long *crash_size,
				1166	unsigned long long *crash_base)
				1167	{
				1168	char cur = cmdline, tmp;
				1169
				1170	/* for each entry of the comma-separated list */
				1171	do {
				1172	unsigned long long start, end = ULLONG_MAX, size;
				1173
				1174	/* get the start of the range */
				1175	start = memparse(cur, &tmp);
				1176	if (cur == tmp) {
				1177	pr_warning("crashkernel: Memory value expected\n");
				1178	return -EINVAL;
				1179	}
				1180	cur = tmp;
				1181	if (*cur != '-') {
				1182	pr_warning("crashkernel: '-' expected\n");
				1183	return -EINVAL;
				1184	}
				1185	cur++;
				1186
				1187	/* if no ':' is here, than we read the end */
				1188	if (*cur != ':') {
				1189	end = memparse(cur, &tmp);
				1190	if (cur == tmp) {
				1191	pr_warning("crashkernel: Memory "
				1192	"value expected\n");
				1193	return -EINVAL;
				1194	}
				1195	cur = tmp;
				1196	if (end <= start) {
				1197	pr_warning("crashkernel: end <= start\n");
				1198	return -EINVAL;
				1199	}
				1200	}
				1201
				1202	if (*cur != ':') {
				1203	pr_warning("crashkernel: ':' expected\n");
				1204	return -EINVAL;
				1205	}
				1206	cur++;
				1207
				1208	size = memparse(cur, &tmp);
				1209	if (cur == tmp) {
				1210	pr_warning("Memory value expected\n");
				1211	return -EINVAL;
				1212	}
				1213	cur = tmp;
				1214	if (size >= system_ram) {
				1215	pr_warning("crashkernel: invalid size\n");
				1216	return -EINVAL;
				1217	}
				1218
				1219	/* match ? */
				1220	if (system_ram >= start && system_ram <= end) {
				1221	*crash_size = size;
				1222	break;
				1223	}
				1224	} while (*cur++ == ',');
				1225
				1226	if (*crash_size > 0) {
				1227	while (cur != ' ' && cur != '@')
				1228	cur++;
				1229	if (*cur == '@') {
				1230	cur++;
				1231	*crash_base = memparse(cur, &tmp);
				1232	if (cur == tmp) {
				1233	pr_warning("Memory value expected "
				1234	"after '@'\n");
				1235	return -EINVAL;
				1236	}
				1237	}
				1238	}
				1239
				1240	return 0;
				1241	}
				1242
				1243	/*
				1244	* That function parses "simple" (old) crashkernel command lines like
				1245	*
				1246	* crashkernel=size[@offset]
				1247	*
				1248	* It returns 0 on success and -EINVAL on failure.
				1249	*/
				1250	static int __init parse_crashkernel_simple(char *cmdline,
				1251	unsigned long long *crash_size,
				1252	unsigned long long *crash_base)
				1253	{
				1254	char *cur = cmdline;
				1255
				1256	*crash_size = memparse(cmdline, &cur);
				1257	if (cmdline == cur) {
				1258	pr_warning("crashkernel: memory value expected\n");
				1259	return -EINVAL;
				1260	}
				1261
				1262	if (*cur == '@')
				1263	*crash_base = memparse(cur+1, &cur);
				1264
				1265	return 0;
				1266	}
				1267
				1268	/*
				1269	* That function is the entry point for command line parsing and should be
				1270	* called from the arch-specific code.
				1271	*/
				1272	int __init parse_crashkernel(char *cmdline,
				1273	unsigned long long system_ram,
				1274	unsigned long long *crash_size,
				1275	unsigned long long *crash_base)
				1276	{
				1277	char p = cmdline, ck_cmdline = NULL;
				1278	char first_colon, first_space;
				1279
				1280	BUG_ON(!crash_size \|\| !crash_base);
				1281	*crash_size = 0;
				1282	*crash_base = 0;
				1283
				1284	/* find crashkernel and use the last one if there are more */
				1285	p = strstr(p, "crashkernel=");
				1286	while (p) {
				1287	ck_cmdline = p;
				1288	p = strstr(p+1, "crashkernel=");
				1289	}
				1290
				1291	if (!ck_cmdline)
				1292	return -EINVAL;
				1293
				1294	ck_cmdline += 12; /* strlen("crashkernel=") */
				1295
				1296	/*
				1297	* if the commandline contains a ':', then that's the extended
				1298	* syntax -- if not, it must be the classic syntax
				1299	*/
				1300	first_colon = strchr(ck_cmdline, ':');
				1301	first_space = strchr(ck_cmdline, ' ');
				1302	if (first_colon && (!first_space \|\| first_colon < first_space))
				1303	return parse_crashkernel_mem(ck_cmdline, system_ram,
				1304	crash_size, crash_base);
				1305	else
				1306	return parse_crashkernel_simple(ck_cmdline, crash_size,
				1307	crash_base);
				1308
				1309	return 0;
				1310	}
				1311
				1312
				1313
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1314	void crash_save_vmcoreinfo(void)
				1315	{
				1316	u32 *buf;
				1317
				1318	if (!vmcoreinfo_size)
				1319	return;
				1320
Ken'ichi Ohmichi	d768281	2007-10-16 23:27:28 -0700	[diff] [blame]	1321	vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds());
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1322
				1323	buf = (u32 *)vmcoreinfo_note;
				1324
				1325	buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
				1326	vmcoreinfo_size);
				1327
				1328	final_note(buf);
				1329	}
				1330
				1331	void vmcoreinfo_append_str(const char *fmt, ...)
				1332	{
				1333	va_list args;
				1334	char buf[0x50];
				1335	int r;
				1336
				1337	va_start(args, fmt);
				1338	r = vsnprintf(buf, sizeof(buf), fmt, args);
				1339	va_end(args);
				1340
				1341	if (r + vmcoreinfo_size > vmcoreinfo_max_size)
				1342	r = vmcoreinfo_max_size - vmcoreinfo_size;
				1343
				1344	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
				1345
				1346	vmcoreinfo_size += r;
				1347	}
				1348
				1349	/*
				1350	* provide an empty default implementation here -- architecture
				1351	* code may override this
				1352	*/
				1353	void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void)
				1354	{}
				1355
				1356	unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void)
				1357	{
				1358	return __pa((unsigned long)(char *)&vmcoreinfo_note);
				1359	}
				1360
				1361	static int __init crash_save_vmcoreinfo_init(void)
				1362	{
Ken'ichi Ohmichi	bba1f60	2008-02-07 00:15:22 -0800	[diff] [blame]	1363	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
				1364	VMCOREINFO_PAGESIZE(PAGE_SIZE);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1365
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1366	VMCOREINFO_SYMBOL(init_uts_ns);
				1367	VMCOREINFO_SYMBOL(node_online_map);
				1368	VMCOREINFO_SYMBOL(swapper_pg_dir);
				1369	VMCOREINFO_SYMBOL(_stext);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1370
				1371	#ifndef CONFIG_NEED_MULTIPLE_NODES
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1372	VMCOREINFO_SYMBOL(mem_map);
				1373	VMCOREINFO_SYMBOL(contig_page_data);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1374	#endif
				1375	#ifdef CONFIG_SPARSEMEM
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1376	VMCOREINFO_SYMBOL(mem_section);
				1377	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
Ken'ichi Ohmichi	c76f860	2008-02-07 00:15:20 -0800	[diff] [blame]	1378	VMCOREINFO_STRUCT_SIZE(mem_section);
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1379	VMCOREINFO_OFFSET(mem_section, section_mem_map);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1380	#endif
Ken'ichi Ohmichi	c76f860	2008-02-07 00:15:20 -0800	[diff] [blame]	1381	VMCOREINFO_STRUCT_SIZE(page);
				1382	VMCOREINFO_STRUCT_SIZE(pglist_data);
				1383	VMCOREINFO_STRUCT_SIZE(zone);
				1384	VMCOREINFO_STRUCT_SIZE(free_area);
				1385	VMCOREINFO_STRUCT_SIZE(list_head);
				1386	VMCOREINFO_SIZE(nodemask_t);
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1387	VMCOREINFO_OFFSET(page, flags);
				1388	VMCOREINFO_OFFSET(page, _count);
				1389	VMCOREINFO_OFFSET(page, mapping);
				1390	VMCOREINFO_OFFSET(page, lru);
				1391	VMCOREINFO_OFFSET(pglist_data, node_zones);
				1392	VMCOREINFO_OFFSET(pglist_data, nr_zones);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1393	#ifdef CONFIG_FLAT_NODE_MEM_MAP
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1394	VMCOREINFO_OFFSET(pglist_data, node_mem_map);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1395	#endif
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1396	VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
				1397	VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
				1398	VMCOREINFO_OFFSET(pglist_data, node_id);
				1399	VMCOREINFO_OFFSET(zone, free_area);
				1400	VMCOREINFO_OFFSET(zone, vm_stat);
				1401	VMCOREINFO_OFFSET(zone, spanned_pages);
				1402	VMCOREINFO_OFFSET(free_area, free_list);
				1403	VMCOREINFO_OFFSET(list_head, next);
				1404	VMCOREINFO_OFFSET(list_head, prev);
				1405	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
Ken'ichi Ohmichi	83a08e7	2008-01-08 15:33:05 -0800	[diff] [blame]	1406	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
Ken'ichi Ohmichi	bcbba6c	2007-10-16 23:27:30 -0700	[diff] [blame]	1407	VMCOREINFO_NUMBER(NR_FREE_PAGES);
Ken'ichi Ohmichi	122c7a5	2008-04-28 02:13:04 -0700	[diff] [blame^]	1408	VMCOREINFO_NUMBER(PG_lru);
				1409	VMCOREINFO_NUMBER(PG_private);
				1410	VMCOREINFO_NUMBER(PG_swapcache);
Ken'ichi Ohmichi	fd59d23	2007-10-16 23:27:27 -0700	[diff] [blame]	1411
				1412	arch_crash_save_vmcoreinfo();
				1413
				1414	return 0;
				1415	}
				1416
				1417	module_init(crash_save_vmcoreinfo_init)