Blame - kernel/kexec_core.c - kernel/msm-4.9

blob: f5ab72ebda1134a398748ae5907c585efc85d4b9 [file] [log] [blame]

Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1	/*
				2	* kexec.c - kexec system call core code.
				3	* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
				4	*
				5	* This source code is licensed under the GNU General Public License,
				6	* Version 2. See the file COPYING for more details.
				7	*/
				8
Minfei Huang	de90a6b	2015-11-06 16:32:45 -0800	[diff] [blame]	9	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	10
				11	#include <linux/capability.h>
				12	#include <linux/mm.h>
				13	#include <linux/file.h>
				14	#include <linux/slab.h>
				15	#include <linux/fs.h>
				16	#include <linux/kexec.h>
				17	#include <linux/mutex.h>
				18	#include <linux/list.h>
				19	#include <linux/highmem.h>
				20	#include <linux/syscalls.h>
				21	#include <linux/reboot.h>
				22	#include <linux/ioport.h>
				23	#include <linux/hardirq.h>
				24	#include <linux/elf.h>
				25	#include <linux/elfcore.h>
				26	#include <linux/utsname.h>
				27	#include <linux/numa.h>
				28	#include <linux/suspend.h>
				29	#include <linux/device.h>
				30	#include <linux/freezer.h>
				31	#include <linux/pm.h>
				32	#include <linux/cpu.h>
				33	#include <linux/uaccess.h>
				34	#include <linux/io.h>
				35	#include <linux/console.h>
				36	#include <linux/vmalloc.h>
				37	#include <linux/swap.h>
				38	#include <linux/syscore_ops.h>
				39	#include <linux/compiler.h>
				40	#include <linux/hugetlb.h>
Josh Poimboeuf	935893a	2017-06-28 10:11:06 -0500	[diff] [blame]	41	#include <linux/frame.h>
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	42
				43	#include <asm/page.h>
				44	#include <asm/sections.h>
				45
				46	#include <crypto/hash.h>
				47	#include <crypto/sha.h>
				48	#include "kexec_internal.h"
				49
				50	DEFINE_MUTEX(kexec_mutex);
				51
				52	/* Per cpu memory for storing cpu states in case of system crash. */
				53	note_buf_t __percpu *crash_notes;
				54
				55	/* vmcoreinfo stuff */
				56	static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
				57	u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
				58	size_t vmcoreinfo_size;
				59	size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
				60
				61	/* Flag to indicate we are going to kexec a new kernel */
				62	bool kexec_in_progress = false;
				63
				64
				65	/* Location of the reserved area for the crash kernel */
				66	struct resource crashk_res = {
				67	.name = "Crash kernel",
				68	.start = 0,
				69	.end = 0,
Toshi Kani	1a085d0	2016-01-26 21:57:23 +0100	[diff] [blame]	70	.flags = IORESOURCE_BUSY \| IORESOURCE_SYSTEM_RAM,
				71	.desc = IORES_DESC_CRASH_KERNEL
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	72	};
				73	struct resource crashk_low_res = {
				74	.name = "Crash kernel",
				75	.start = 0,
				76	.end = 0,
Toshi Kani	1a085d0	2016-01-26 21:57:23 +0100	[diff] [blame]	77	.flags = IORESOURCE_BUSY \| IORESOURCE_SYSTEM_RAM,
				78	.desc = IORES_DESC_CRASH_KERNEL
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	79	};
				80
				81	int kexec_should_crash(struct task_struct *p)
				82	{
				83	/*
				84	* If crash_kexec_post_notifiers is enabled, don't run
				85	* crash_kexec() here yet, which must be run after panic
				86	* notifiers in panic().
				87	*/
				88	if (crash_kexec_post_notifiers)
				89	return 0;
				90	/*
				91	* There are 4 panic() calls in do_exit() path, each of which
				92	* corresponds to each of these 4 conditions.
				93	*/
				94	if (in_interrupt() \|\| !p->pid \|\| is_global_init(p) \|\| panic_on_oops)
				95	return 1;
				96	return 0;
				97	}
				98
Petr Tesarik	21db79e	2016-08-02 14:06:16 -0700	[diff] [blame]	99	int kexec_crash_loaded(void)
				100	{
				101	return !!kexec_crash_image;
				102	}
				103	EXPORT_SYMBOL_GPL(kexec_crash_loaded);
				104
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	105	/*
				106	* When kexec transitions to the new kernel there is a one-to-one
				107	* mapping between physical and virtual addresses. On processors
				108	* where you can disable the MMU this is trivial, and easy. For
				109	* others it is still a simple predictable page table to setup.
				110	*
				111	* In that environment kexec copies the new kernel to its final
				112	* resting place. This means I can only support memory whose
				113	* physical address can fit in an unsigned long. In particular
				114	* addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
				115	* If the assembly stub has more restrictive requirements
				116	* KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
				117	* defined more restrictively in <asm/kexec.h>.
				118	*
				119	* The code for the transition from the current kernel to the
				120	* the new kernel is placed in the control_code_buffer, whose size
				121	* is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
				122	* page of memory is necessary, but some architectures require more.
				123	* Because this memory must be identity mapped in the transition from
				124	* virtual to physical addresses it must live in the range
				125	* 0 - TASK_SIZE, as only the user space mappings are arbitrarily
				126	* modifiable.
				127	*
				128	* The assembly stub in the control code buffer is passed a linked list
				129	* of descriptor pages detailing the source pages of the new kernel,
				130	* and the destination addresses of those source pages. As this data
				131	* structure is not used in the context of the current OS, it must
				132	* be self-contained.
				133	*
				134	* The code has been made to work with highmem pages and will use a
				135	* destination page in its final resting place (if it happens
				136	* to allocate it). The end product of this is that most of the
				137	* physical address space, and most of RAM can be used.
				138	*
				139	* Future directions include:
				140	* - allocating a page table with the control code buffer identity
				141	* mapped, to simplify machine_kexec and make kexec_on_panic more
				142	* reliable.
				143	*/
				144
				145	/*
				146	* KIMAGE_NO_DEST is an impossible destination address..., for
				147	* allocating pages whose destination address we do not care about.
				148	*/
				149	#define KIMAGE_NO_DEST (-1UL)
zhong jiang	1730f14	2016-08-02 14:06:22 -0700	[diff] [blame]	150	#define PAGE_COUNT(x) (((x) + PAGE_SIZE - 1) >> PAGE_SHIFT)
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	151
				152	static struct page kimage_alloc_page(struct kimage image,
				153	gfp_t gfp_mask,
				154	unsigned long dest);
				155
				156	int sanity_check_segment_list(struct kimage *image)
				157	{
Minfei Huang	4caf961	2016-08-02 14:05:45 -0700	[diff] [blame]	158	int i;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	159	unsigned long nr_segments = image->nr_segments;
zhong jiang	1730f14	2016-08-02 14:06:22 -0700	[diff] [blame]	160	unsigned long total_pages = 0;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	161
				162	/*
				163	* Verify we have good destination addresses. The caller is
				164	* responsible for making certain we don't attempt to load
				165	* the new image into invalid or reserved areas of RAM. This
				166	* just verifies it is an address we can use.
				167	*
				168	* Since the kernel does everything in page size chunks ensure
				169	* the destination addresses are page aligned. Too many
				170	* special cases crop of when we don't do this. The most
				171	* insidious is getting overlapping destination addresses
				172	* simply because addresses are changed to page size
				173	* granularity.
				174	*/
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	175	for (i = 0; i < nr_segments; i++) {
				176	unsigned long mstart, mend;
				177
				178	mstart = image->segment[i].mem;
				179	mend = mstart + image->segment[i].memsz;
Russell King	465d377	2016-08-02 14:05:57 -0700	[diff] [blame]	180	if (mstart > mend)
				181	return -EADDRNOTAVAIL;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	182	if ((mstart & ~PAGE_MASK) \|\| (mend & ~PAGE_MASK))
Minfei Huang	4caf961	2016-08-02 14:05:45 -0700	[diff] [blame]	183	return -EADDRNOTAVAIL;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	184	if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
Minfei Huang	4caf961	2016-08-02 14:05:45 -0700	[diff] [blame]	185	return -EADDRNOTAVAIL;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	186	}
				187
				188	/* Verify our destination addresses do not overlap.
				189	* If we alloed overlapping destination addresses
				190	* through very weird things can happen with no
				191	* easy explanation as one segment stops on another.
				192	*/
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	193	for (i = 0; i < nr_segments; i++) {
				194	unsigned long mstart, mend;
				195	unsigned long j;
				196
				197	mstart = image->segment[i].mem;
				198	mend = mstart + image->segment[i].memsz;
				199	for (j = 0; j < i; j++) {
				200	unsigned long pstart, pend;
				201
				202	pstart = image->segment[j].mem;
				203	pend = pstart + image->segment[j].memsz;
				204	/* Do the segments overlap ? */
				205	if ((mend > pstart) && (mstart < pend))
Minfei Huang	4caf961	2016-08-02 14:05:45 -0700	[diff] [blame]	206	return -EINVAL;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	207	}
				208	}
				209
				210	/* Ensure our buffer sizes are strictly less than
				211	* our memory sizes. This should always be the case,
				212	* and it is easier to check up front than to be surprised
				213	* later on.
				214	*/
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	215	for (i = 0; i < nr_segments; i++) {
				216	if (image->segment[i].bufsz > image->segment[i].memsz)
Minfei Huang	4caf961	2016-08-02 14:05:45 -0700	[diff] [blame]	217	return -EINVAL;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	218	}
				219
				220	/*
zhong jiang	1730f14	2016-08-02 14:06:22 -0700	[diff] [blame]	221	* Verify that no more than half of memory will be consumed. If the
				222	* request from userspace is too large, a large amount of time will be
				223	* wasted allocating pages, which can cause a soft lockup.
				224	*/
				225	for (i = 0; i < nr_segments; i++) {
				226	if (PAGE_COUNT(image->segment[i].memsz) > totalram_pages / 2)
				227	return -EINVAL;
				228
				229	total_pages += PAGE_COUNT(image->segment[i].memsz);
				230	}
				231
				232	if (total_pages > totalram_pages / 2)
				233	return -EINVAL;
				234
				235	/*
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	236	* Verify we have good destination addresses. Normally
				237	* the caller is responsible for making certain we don't
				238	* attempt to load the new image into invalid or reserved
				239	* areas of RAM. But crash kernels are preloaded into a
				240	* reserved area of ram. We must ensure the addresses
				241	* are in the reserved area otherwise preloading the
				242	* kernel could corrupt things.
				243	*/
				244
				245	if (image->type == KEXEC_TYPE_CRASH) {
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	246	for (i = 0; i < nr_segments; i++) {
				247	unsigned long mstart, mend;
				248
				249	mstart = image->segment[i].mem;
				250	mend = mstart + image->segment[i].memsz - 1;
				251	/* Ensure we are within the crash kernel limits */
Russell King	43546d8	2016-08-02 14:06:04 -0700	[diff] [blame]	252	if ((mstart < phys_to_boot_phys(crashk_res.start)) \|\|
				253	(mend > phys_to_boot_phys(crashk_res.end)))
Minfei Huang	4caf961	2016-08-02 14:05:45 -0700	[diff] [blame]	254	return -EADDRNOTAVAIL;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	255	}
				256	}
				257
				258	return 0;
				259	}
				260
				261	struct kimage *do_kimage_alloc_init(void)
				262	{
				263	struct kimage *image;
				264
				265	/* Allocate a controlling structure */
				266	image = kzalloc(sizeof(*image), GFP_KERNEL);
				267	if (!image)
				268	return NULL;
				269
				270	image->head = 0;
				271	image->entry = &image->head;
				272	image->last_entry = &image->head;
				273	image->control_page = ~0; /* By default this does not apply */
				274	image->type = KEXEC_TYPE_DEFAULT;
				275
				276	/* Initialize the list of control pages */
				277	INIT_LIST_HEAD(&image->control_pages);
				278
				279	/* Initialize the list of destination pages */
				280	INIT_LIST_HEAD(&image->dest_pages);
				281
				282	/* Initialize the list of unusable pages */
				283	INIT_LIST_HEAD(&image->unusable_pages);
				284
				285	return image;
				286	}
				287
				288	int kimage_is_destination_range(struct kimage *image,
				289	unsigned long start,
				290	unsigned long end)
				291	{
				292	unsigned long i;
				293
				294	for (i = 0; i < image->nr_segments; i++) {
				295	unsigned long mstart, mend;
				296
				297	mstart = image->segment[i].mem;
				298	mend = mstart + image->segment[i].memsz;
				299	if ((end > mstart) && (start < mend))
				300	return 1;
				301	}
				302
				303	return 0;
				304	}
				305
				306	static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
				307	{
				308	struct page *pages;
				309
				310	pages = alloc_pages(gfp_mask, order);
				311	if (pages) {
				312	unsigned int count, i;
				313
				314	pages->mapping = NULL;
				315	set_page_private(pages, order);
				316	count = 1 << order;
				317	for (i = 0; i < count; i++)
				318	SetPageReserved(pages + i);
				319	}
				320
				321	return pages;
				322	}
				323
				324	static void kimage_free_pages(struct page *page)
				325	{
				326	unsigned int order, count, i;
				327
				328	order = page_private(page);
				329	count = 1 << order;
				330	for (i = 0; i < count; i++)
				331	ClearPageReserved(page + i);
				332	__free_pages(page, order);
				333	}
				334
				335	void kimage_free_page_list(struct list_head *list)
				336	{
Geliang Tang	2b24692	2016-01-20 15:00:34 -0800	[diff] [blame]	337	struct page page, next;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	338
Geliang Tang	2b24692	2016-01-20 15:00:34 -0800	[diff] [blame]	339	list_for_each_entry_safe(page, next, list, lru) {
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	340	list_del(&page->lru);
				341	kimage_free_pages(page);
				342	}
				343	}
				344
				345	static struct page kimage_alloc_normal_control_pages(struct kimage image,
				346	unsigned int order)
				347	{
				348	/* Control pages are special, they are the intermediaries
				349	* that are needed while we copy the rest of the pages
				350	* to their final resting place. As such they must
				351	* not conflict with either the destination addresses
				352	* or memory the kernel is already using.
				353	*
				354	* The only case where we really need more than one of
				355	* these are for architectures where we cannot disable
				356	* the MMU and must instead generate an identity mapped
				357	* page table for all of the memory.
				358	*
				359	* At worst this runs in O(N) of the image size.
				360	*/
				361	struct list_head extra_pages;
				362	struct page *pages;
				363	unsigned int count;
				364
				365	count = 1 << order;
				366	INIT_LIST_HEAD(&extra_pages);
				367
				368	/* Loop while I can allocate a page and the page allocated
				369	* is a destination page.
				370	*/
				371	do {
				372	unsigned long pfn, epfn, addr, eaddr;
				373
				374	pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order);
				375	if (!pages)
				376	break;
Russell King	43546d8	2016-08-02 14:06:04 -0700	[diff] [blame]	377	pfn = page_to_boot_pfn(pages);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	378	epfn = pfn + count;
				379	addr = pfn << PAGE_SHIFT;
				380	eaddr = epfn << PAGE_SHIFT;
				381	if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) \|\|
				382	kimage_is_destination_range(image, addr, eaddr)) {
				383	list_add(&pages->lru, &extra_pages);
				384	pages = NULL;
				385	}
				386	} while (!pages);
				387
				388	if (pages) {
				389	/* Remember the allocated page... */
				390	list_add(&pages->lru, &image->control_pages);
				391
				392	/* Because the page is already in it's destination
				393	* location we will never allocate another page at
				394	* that address. Therefore kimage_alloc_pages
				395	* will not return it (again) and we don't need
				396	* to give it an entry in image->segment[].
				397	*/
				398	}
				399	/* Deal with the destination pages I have inadvertently allocated.
				400	*
				401	* Ideally I would convert multi-page allocations into single
				402	* page allocations, and add everything to image->dest_pages.
				403	*
				404	* For now it is simpler to just free the pages.
				405	*/
				406	kimage_free_page_list(&extra_pages);
				407
				408	return pages;
				409	}
				410
				411	static struct page kimage_alloc_crash_control_pages(struct kimage image,
				412	unsigned int order)
				413	{
				414	/* Control pages are special, they are the intermediaries
				415	* that are needed while we copy the rest of the pages
				416	* to their final resting place. As such they must
				417	* not conflict with either the destination addresses
				418	* or memory the kernel is already using.
				419	*
				420	* Control pages are also the only pags we must allocate
				421	* when loading a crash kernel. All of the other pages
				422	* are specified by the segments and we just memcpy
				423	* into them directly.
				424	*
				425	* The only case where we really need more than one of
				426	* these are for architectures where we cannot disable
				427	* the MMU and must instead generate an identity mapped
				428	* page table for all of the memory.
				429	*
				430	* Given the low demand this implements a very simple
				431	* allocator that finds the first hole of the appropriate
				432	* size in the reserved memory region, and allocates all
				433	* of the memory up to and including the hole.
				434	*/
				435	unsigned long hole_start, hole_end, size;
				436	struct page *pages;
				437
				438	pages = NULL;
				439	size = (1 << order) << PAGE_SHIFT;
				440	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
				441	hole_end = hole_start + size - 1;
				442	while (hole_end <= crashk_res.end) {
				443	unsigned long i;
				444
				445	if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
				446	break;
				447	/* See if I overlap any of the segments */
				448	for (i = 0; i < image->nr_segments; i++) {
				449	unsigned long mstart, mend;
				450
				451	mstart = image->segment[i].mem;
				452	mend = mstart + image->segment[i].memsz - 1;
				453	if ((hole_end >= mstart) && (hole_start <= mend)) {
				454	/* Advance the hole to the end of the segment */
				455	hole_start = (mend + (size - 1)) & ~(size - 1);
				456	hole_end = hole_start + size - 1;
				457	break;
				458	}
				459	}
				460	/* If I don't overlap any segments I have found my hole! */
				461	if (i == image->nr_segments) {
				462	pages = pfn_to_page(hole_start >> PAGE_SHIFT);
Minfei Huang	04e9949	2015-09-09 15:38:58 -0700	[diff] [blame]	463	image->control_page = hole_end;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	464	break;
				465	}
				466	}
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	467
				468	return pages;
				469	}
				470
				471
				472	struct page kimage_alloc_control_pages(struct kimage image,
				473	unsigned int order)
				474	{
				475	struct page *pages = NULL;
				476
				477	switch (image->type) {
				478	case KEXEC_TYPE_DEFAULT:
				479	pages = kimage_alloc_normal_control_pages(image, order);
				480	break;
				481	case KEXEC_TYPE_CRASH:
				482	pages = kimage_alloc_crash_control_pages(image, order);
				483	break;
				484	}
				485
				486	return pages;
				487	}
				488
				489	static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
				490	{
				491	if (*image->entry != 0)
				492	image->entry++;
				493
				494	if (image->entry == image->last_entry) {
				495	kimage_entry_t *ind_page;
				496	struct page *page;
				497
				498	page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
				499	if (!page)
				500	return -ENOMEM;
				501
				502	ind_page = page_address(page);
Russell King	43546d8	2016-08-02 14:06:04 -0700	[diff] [blame]	503	*image->entry = virt_to_boot_phys(ind_page) \| IND_INDIRECTION;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	504	image->entry = ind_page;
				505	image->last_entry = ind_page +
				506	((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
				507	}
				508	*image->entry = entry;
				509	image->entry++;
				510	*image->entry = 0;
				511
				512	return 0;
				513	}
				514
				515	static int kimage_set_destination(struct kimage *image,
				516	unsigned long destination)
				517	{
				518	int result;
				519
				520	destination &= PAGE_MASK;
				521	result = kimage_add_entry(image, destination \| IND_DESTINATION);
				522
				523	return result;
				524	}
				525
				526
				527	static int kimage_add_page(struct kimage *image, unsigned long page)
				528	{
				529	int result;
				530
				531	page &= PAGE_MASK;
				532	result = kimage_add_entry(image, page \| IND_SOURCE);
				533
				534	return result;
				535	}
				536
				537
				538	static void kimage_free_extra_pages(struct kimage *image)
				539	{
				540	/* Walk through and free any extra destination pages I may have */
				541	kimage_free_page_list(&image->dest_pages);
				542
				543	/* Walk through and free any unusable pages I have cached */
				544	kimage_free_page_list(&image->unusable_pages);
				545
				546	}
				547	void kimage_terminate(struct kimage *image)
				548	{
				549	if (*image->entry != 0)
				550	image->entry++;
				551
				552	*image->entry = IND_DONE;
				553	}
				554
				555	#define for_each_kimage_entry(image, ptr, entry) \
				556	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
				557	ptr = (entry & IND_INDIRECTION) ? \
Russell King	43546d8	2016-08-02 14:06:04 -0700	[diff] [blame]	558	boot_phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	559
				560	static void kimage_free_entry(kimage_entry_t entry)
				561	{
				562	struct page *page;
				563
Russell King	43546d8	2016-08-02 14:06:04 -0700	[diff] [blame]	564	page = boot_pfn_to_page(entry >> PAGE_SHIFT);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	565	kimage_free_pages(page);
				566	}
				567
				568	void kimage_free(struct kimage *image)
				569	{
				570	kimage_entry_t *ptr, entry;
				571	kimage_entry_t ind = 0;
				572
				573	if (!image)
				574	return;
				575
				576	kimage_free_extra_pages(image);
				577	for_each_kimage_entry(image, ptr, entry) {
				578	if (entry & IND_INDIRECTION) {
				579	/* Free the previous indirection page */
				580	if (ind & IND_INDIRECTION)
				581	kimage_free_entry(ind);
				582	/* Save this indirection page until we are
				583	* done with it.
				584	*/
				585	ind = entry;
				586	} else if (entry & IND_SOURCE)
				587	kimage_free_entry(entry);
				588	}
				589	/* Free the final indirection page */
				590	if (ind & IND_INDIRECTION)
				591	kimage_free_entry(ind);
				592
				593	/* Handle any machine specific cleanup */
				594	machine_kexec_cleanup(image);
				595
				596	/* Free the kexec control pages... */
				597	kimage_free_page_list(&image->control_pages);
				598
				599	/*
				600	* Free up any temporary buffers allocated. This might hit if
				601	* error occurred much later after buffer allocation.
				602	*/
				603	if (image->file_mode)
				604	kimage_file_post_load_cleanup(image);
				605
				606	kfree(image);
				607	}
				608
				609	static kimage_entry_t kimage_dst_used(struct kimage image,
				610	unsigned long page)
				611	{
				612	kimage_entry_t *ptr, entry;
				613	unsigned long destination = 0;
				614
				615	for_each_kimage_entry(image, ptr, entry) {
				616	if (entry & IND_DESTINATION)
				617	destination = entry & PAGE_MASK;
				618	else if (entry & IND_SOURCE) {
				619	if (page == destination)
				620	return ptr;
				621	destination += PAGE_SIZE;
				622	}
				623	}
				624
				625	return NULL;
				626	}
				627
				628	static struct page kimage_alloc_page(struct kimage image,
				629	gfp_t gfp_mask,
				630	unsigned long destination)
				631	{
				632	/*
				633	* Here we implement safeguards to ensure that a source page
				634	* is not copied to its destination page before the data on
				635	* the destination page is no longer useful.
				636	*
				637	* To do this we maintain the invariant that a source page is
				638	* either its own destination page, or it is not a
				639	* destination page at all.
				640	*
				641	* That is slightly stronger than required, but the proof
				642	* that no problems will not occur is trivial, and the
				643	* implementation is simply to verify.
				644	*
				645	* When allocating all pages normally this algorithm will run
				646	* in O(N) time, but in the worst case it will run in O(N^2)
				647	* time. If the runtime is a problem the data structures can
				648	* be fixed.
				649	*/
				650	struct page *page;
				651	unsigned long addr;
				652
				653	/*
				654	* Walk through the list of destination pages, and see if I
				655	* have a match.
				656	*/
				657	list_for_each_entry(page, &image->dest_pages, lru) {
Russell King	43546d8	2016-08-02 14:06:04 -0700	[diff] [blame]	658	addr = page_to_boot_pfn(page) << PAGE_SHIFT;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	659	if (addr == destination) {
				660	list_del(&page->lru);
				661	return page;
				662	}
				663	}
				664	page = NULL;
				665	while (1) {
				666	kimage_entry_t *old;
				667
				668	/* Allocate a page, if we run out of memory give up */
				669	page = kimage_alloc_pages(gfp_mask, 0);
				670	if (!page)
				671	return NULL;
				672	/* If the page cannot be used file it away */
Russell King	43546d8	2016-08-02 14:06:04 -0700	[diff] [blame]	673	if (page_to_boot_pfn(page) >
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	674	(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
				675	list_add(&page->lru, &image->unusable_pages);
				676	continue;
				677	}
Russell King	43546d8	2016-08-02 14:06:04 -0700	[diff] [blame]	678	addr = page_to_boot_pfn(page) << PAGE_SHIFT;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	679
				680	/* If it is the destination page we want use it */
				681	if (addr == destination)
				682	break;
				683
				684	/* If the page is not a destination page use it */
				685	if (!kimage_is_destination_range(image, addr,
				686	addr + PAGE_SIZE))
				687	break;
				688
				689	/*
				690	* I know that the page is someones destination page.
				691	* See if there is already a source page for this
				692	* destination page. And if so swap the source pages.
				693	*/
				694	old = kimage_dst_used(image, addr);
				695	if (old) {
				696	/* If so move it */
				697	unsigned long old_addr;
				698	struct page *old_page;
				699
				700	old_addr = *old & PAGE_MASK;
Russell King	43546d8	2016-08-02 14:06:04 -0700	[diff] [blame]	701	old_page = boot_pfn_to_page(old_addr >> PAGE_SHIFT);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	702	copy_highpage(page, old_page);
				703	old = addr \| (old & ~PAGE_MASK);
				704
				705	/* The old page I have found cannot be a
				706	* destination page, so return it if it's
				707	* gfp_flags honor the ones passed in.
				708	*/
				709	if (!(gfp_mask & __GFP_HIGHMEM) &&
				710	PageHighMem(old_page)) {
				711	kimage_free_pages(old_page);
				712	continue;
				713	}
				714	addr = old_addr;
				715	page = old_page;
				716	break;
				717	}
				718	/* Place the page on the destination list, to be used later */
				719	list_add(&page->lru, &image->dest_pages);
				720	}
				721
				722	return page;
				723	}
				724
				725	static int kimage_load_normal_segment(struct kimage *image,
				726	struct kexec_segment *segment)
				727	{
				728	unsigned long maddr;
				729	size_t ubytes, mbytes;
				730	int result;
				731	unsigned char __user *buf = NULL;
				732	unsigned char *kbuf = NULL;
				733
				734	result = 0;
				735	if (image->file_mode)
				736	kbuf = segment->kbuf;
				737	else
				738	buf = segment->buf;
				739	ubytes = segment->bufsz;
				740	mbytes = segment->memsz;
				741	maddr = segment->mem;
				742
				743	result = kimage_set_destination(image, maddr);
				744	if (result < 0)
				745	goto out;
				746
				747	while (mbytes) {
				748	struct page *page;
				749	char *ptr;
				750	size_t uchunk, mchunk;
				751
				752	page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
				753	if (!page) {
				754	result = -ENOMEM;
				755	goto out;
				756	}
Russell King	43546d8	2016-08-02 14:06:04 -0700	[diff] [blame]	757	result = kimage_add_page(image, page_to_boot_pfn(page)
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	758	<< PAGE_SHIFT);
				759	if (result < 0)
				760	goto out;
				761
				762	ptr = kmap(page);
				763	/* Start with a clear page */
				764	clear_page(ptr);
				765	ptr += maddr & ~PAGE_MASK;
				766	mchunk = min_t(size_t, mbytes,
				767	PAGE_SIZE - (maddr & ~PAGE_MASK));
				768	uchunk = min(ubytes, mchunk);
				769
				770	/* For file based kexec, source pages are in kernel memory */
				771	if (image->file_mode)
				772	memcpy(ptr, kbuf, uchunk);
				773	else
				774	result = copy_from_user(ptr, buf, uchunk);
				775	kunmap(page);
				776	if (result) {
				777	result = -EFAULT;
				778	goto out;
				779	}
				780	ubytes -= uchunk;
				781	maddr += mchunk;
				782	if (image->file_mode)
				783	kbuf += mchunk;
				784	else
				785	buf += mchunk;
				786	mbytes -= mchunk;
				787	}
				788	out:
				789	return result;
				790	}
				791
				792	static int kimage_load_crash_segment(struct kimage *image,
				793	struct kexec_segment *segment)
				794	{
				795	/* For crash dumps kernels we simply copy the data from
				796	* user space to it's destination.
				797	* We do things a page at a time for the sake of kmap.
				798	*/
				799	unsigned long maddr;
				800	size_t ubytes, mbytes;
				801	int result;
				802	unsigned char __user *buf = NULL;
				803	unsigned char *kbuf = NULL;
				804
				805	result = 0;
				806	if (image->file_mode)
				807	kbuf = segment->kbuf;
				808	else
				809	buf = segment->buf;
				810	ubytes = segment->bufsz;
				811	mbytes = segment->memsz;
				812	maddr = segment->mem;
				813	while (mbytes) {
				814	struct page *page;
				815	char *ptr;
				816	size_t uchunk, mchunk;
				817
Russell King	43546d8	2016-08-02 14:06:04 -0700	[diff] [blame]	818	page = boot_pfn_to_page(maddr >> PAGE_SHIFT);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	819	if (!page) {
				820	result = -ENOMEM;
				821	goto out;
				822	}
				823	ptr = kmap(page);
				824	ptr += maddr & ~PAGE_MASK;
				825	mchunk = min_t(size_t, mbytes,
				826	PAGE_SIZE - (maddr & ~PAGE_MASK));
				827	uchunk = min(ubytes, mchunk);
				828	if (mchunk > uchunk) {
				829	/* Zero the trailing part of the page */
				830	memset(ptr + uchunk, 0, mchunk - uchunk);
				831	}
				832
				833	/* For file based kexec, source pages are in kernel memory */
				834	if (image->file_mode)
				835	memcpy(ptr, kbuf, uchunk);
				836	else
				837	result = copy_from_user(ptr, buf, uchunk);
				838	kexec_flush_icache_page(page);
				839	kunmap(page);
				840	if (result) {
				841	result = -EFAULT;
				842	goto out;
				843	}
				844	ubytes -= uchunk;
				845	maddr += mchunk;
				846	if (image->file_mode)
				847	kbuf += mchunk;
				848	else
				849	buf += mchunk;
				850	mbytes -= mchunk;
				851	}
				852	out:
				853	return result;
				854	}
				855
				856	int kimage_load_segment(struct kimage *image,
				857	struct kexec_segment *segment)
				858	{
				859	int result = -ENOMEM;
				860
				861	switch (image->type) {
				862	case KEXEC_TYPE_DEFAULT:
				863	result = kimage_load_normal_segment(image, segment);
				864	break;
				865	case KEXEC_TYPE_CRASH:
				866	result = kimage_load_crash_segment(image, segment);
				867	break;
				868	}
				869
				870	return result;
				871	}
				872
				873	struct kimage *kexec_image;
				874	struct kimage *kexec_crash_image;
				875	int kexec_load_disabled;
				876
Hidehiro Kawai	7bbee5c	2015-12-14 11:19:11 +0100	[diff] [blame]	877	/*
				878	* No panic_cpu check version of crash_kexec(). This function is called
				879	* only when panic_cpu holds the current CPU number; this is the only CPU
				880	* which processes crash_kexec routines.
				881	*/
Josh Poimboeuf	935893a	2017-06-28 10:11:06 -0500	[diff] [blame]	882	void __noclone __crash_kexec(struct pt_regs *regs)
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	883	{
				884	/* Take the kexec_mutex here to prevent sys_kexec_load
				885	* running on one cpu from replacing the crash kernel
				886	* we are using after a panic on a different cpu.
				887	*
				888	* If the crash kernel was not located in a fixed area
				889	* of memory the xchg(&kexec_crash_image) would be
				890	* sufficient. But since I reuse the memory...
				891	*/
				892	if (mutex_trylock(&kexec_mutex)) {
				893	if (kexec_crash_image) {
				894	struct pt_regs fixed_regs;
				895
				896	crash_setup_regs(&fixed_regs, regs);
				897	crash_save_vmcoreinfo();
				898	machine_crash_shutdown(&fixed_regs);
				899	machine_kexec(kexec_crash_image);
				900	}
				901	mutex_unlock(&kexec_mutex);
				902	}
				903	}
Josh Poimboeuf	935893a	2017-06-28 10:11:06 -0500	[diff] [blame]	904	STACK_FRAME_NON_STANDARD(__crash_kexec);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	905
Hidehiro Kawai	7bbee5c	2015-12-14 11:19:11 +0100	[diff] [blame]	906	void crash_kexec(struct pt_regs *regs)
				907	{
				908	int old_cpu, this_cpu;
				909
				910	/*
				911	* Only one CPU is allowed to execute the crash_kexec() code as with
				912	* panic(). Otherwise parallel calls of panic() and crash_kexec()
				913	* may stop each other. To exclude them, we use panic_cpu here too.
				914	*/
				915	this_cpu = raw_smp_processor_id();
				916	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
				917	if (old_cpu == PANIC_CPU_INVALID) {
				918	/* This is the 1st CPU which comes here, so go ahead. */
Petr Mladek	cf9b110	2016-05-20 17:00:42 -0700	[diff] [blame]	919	printk_nmi_flush_on_panic();
Hidehiro Kawai	7bbee5c	2015-12-14 11:19:11 +0100	[diff] [blame]	920	__crash_kexec(regs);
				921
				922	/*
				923	* Reset panic_cpu to allow another panic()/crash_kexec()
				924	* call.
				925	*/
				926	atomic_set(&panic_cpu, PANIC_CPU_INVALID);
				927	}
				928	}
				929
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	930	size_t crash_get_memory_size(void)
				931	{
				932	size_t size = 0;
				933
				934	mutex_lock(&kexec_mutex);
				935	if (crashk_res.end != crashk_res.start)
				936	size = resource_size(&crashk_res);
				937	mutex_unlock(&kexec_mutex);
				938	return size;
				939	}
				940
				941	void __weak crash_free_reserved_phys_range(unsigned long begin,
				942	unsigned long end)
				943	{
				944	unsigned long addr;
				945
				946	for (addr = begin; addr < end; addr += PAGE_SIZE)
Russell King	43546d8	2016-08-02 14:06:04 -0700	[diff] [blame]	947	free_reserved_page(boot_pfn_to_page(addr >> PAGE_SHIFT));
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	948	}
				949
				950	int crash_shrink_memory(unsigned long new_size)
				951	{
				952	int ret = 0;
				953	unsigned long start, end;
				954	unsigned long old_size;
				955	struct resource *ram_res;
				956
				957	mutex_lock(&kexec_mutex);
				958
				959	if (kexec_crash_image) {
				960	ret = -ENOENT;
				961	goto unlock;
				962	}
				963	start = crashk_res.start;
				964	end = crashk_res.end;
				965	old_size = (end == 0) ? 0 : end - start + 1;
				966	if (new_size >= old_size) {
				967	ret = (new_size == old_size) ? 0 : -EINVAL;
				968	goto unlock;
				969	}
				970
				971	ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
				972	if (!ram_res) {
				973	ret = -ENOMEM;
				974	goto unlock;
				975	}
				976
				977	start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
				978	end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
				979
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	980	crash_free_reserved_phys_range(end, crashk_res.end);
				981
				982	if ((start == end) && (crashk_res.parent != NULL))
				983	release_resource(&crashk_res);
				984
				985	ram_res->start = end;
				986	ram_res->end = crashk_res.end;
Toshi Kani	1a085d0	2016-01-26 21:57:23 +0100	[diff] [blame]	987	ram_res->flags = IORESOURCE_BUSY \| IORESOURCE_SYSTEM_RAM;
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	988	ram_res->name = "System RAM";
				989
				990	crashk_res.end = end - 1;
				991
				992	insert_resource(&iomem_resource, ram_res);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	993
				994	unlock:
				995	mutex_unlock(&kexec_mutex);
				996	return ret;
				997	}
				998
				999	static u32 append_elf_note(u32 buf, char name, unsigned type, void data,
				1000	size_t data_len)
				1001	{
				1002	struct elf_note note;
				1003
				1004	note.n_namesz = strlen(name) + 1;
				1005	note.n_descsz = data_len;
				1006	note.n_type = type;
				1007	memcpy(buf, &note, sizeof(note));
				1008	buf += (sizeof(note) + 3)/4;
				1009	memcpy(buf, name, note.n_namesz);
				1010	buf += (note.n_namesz + 3)/4;
				1011	memcpy(buf, data, note.n_descsz);
				1012	buf += (note.n_descsz + 3)/4;
				1013
				1014	return buf;
				1015	}
				1016
				1017	static void final_note(u32 *buf)
				1018	{
				1019	struct elf_note note;
				1020
				1021	note.n_namesz = 0;
				1022	note.n_descsz = 0;
				1023	note.n_type = 0;
				1024	memcpy(buf, &note, sizeof(note));
				1025	}
				1026
				1027	void crash_save_cpu(struct pt_regs *regs, int cpu)
				1028	{
				1029	struct elf_prstatus prstatus;
				1030	u32 *buf;
				1031
				1032	if ((cpu < 0) \|\| (cpu >= nr_cpu_ids))
				1033	return;
				1034
				1035	/* Using ELF notes here is opportunistic.
				1036	* I need a well defined structure format
				1037	* for the data I pass, and I need tags
				1038	* on the data to indicate what information I have
				1039	* squirrelled away. ELF notes happen to provide
				1040	* all of that, so there is no need to invent something new.
				1041	*/
				1042	buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
				1043	if (!buf)
				1044	return;
				1045	memset(&prstatus, 0, sizeof(prstatus));
				1046	prstatus.pr_pid = current->pid;
				1047	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
				1048	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
				1049	&prstatus, sizeof(prstatus));
				1050	final_note(buf);
				1051	}
				1052
				1053	static int __init crash_notes_memory_init(void)
				1054	{
				1055	/* Allocate memory for saving cpu registers. */
Baoquan He	bbb78b8	2015-09-09 15:39:00 -0700	[diff] [blame]	1056	size_t size, align;
				1057
				1058	/*
				1059	* crash_notes could be allocated across 2 vmalloc pages when percpu
				1060	* is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc
				1061	* pages are also on 2 continuous physical pages. In this case the
				1062	* 2nd part of crash_notes in 2nd page could be lost since only the
				1063	* starting address and size of crash_notes are exported through sysfs.
				1064	* Here round up the size of crash_notes to the nearest power of two
				1065	* and pass it to __alloc_percpu as align value. This can make sure
				1066	* crash_notes is allocated inside one physical page.
				1067	*/
				1068	size = sizeof(note_buf_t);
				1069	align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE);
				1070
				1071	/*
				1072	* Break compile if size is bigger than PAGE_SIZE since crash_notes
				1073	* definitely will be in 2 pages with that.
				1074	*/
				1075	BUILD_BUG_ON(size > PAGE_SIZE);
				1076
				1077	crash_notes = __alloc_percpu(size, align);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1078	if (!crash_notes) {
Minfei Huang	de90a6b	2015-11-06 16:32:45 -0800	[diff] [blame]	1079	pr_warn("Memory allocation for saving cpu register states failed\n");
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1080	return -ENOMEM;
				1081	}
				1082	return 0;
				1083	}
				1084	subsys_initcall(crash_notes_memory_init);
				1085
				1086
				1087	/*
				1088	* parsing the "crashkernel" commandline
				1089	*
				1090	* this code is intended to be called from architecture specific code
				1091	*/
				1092
				1093
				1094	/*
				1095	* This function parses command lines in the format
				1096	*
				1097	* crashkernel=ramsize-range:size[,...][@offset]
				1098	*
				1099	* The function returns 0 on success and -EINVAL on failure.
				1100	*/
				1101	static int __init parse_crashkernel_mem(char *cmdline,
				1102	unsigned long long system_ram,
				1103	unsigned long long *crash_size,
				1104	unsigned long long *crash_base)
				1105	{
				1106	char cur = cmdline, tmp;
				1107
				1108	/* for each entry of the comma-separated list */
				1109	do {
				1110	unsigned long long start, end = ULLONG_MAX, size;
				1111
				1112	/* get the start of the range */
				1113	start = memparse(cur, &tmp);
				1114	if (cur == tmp) {
				1115	pr_warn("crashkernel: Memory value expected\n");
				1116	return -EINVAL;
				1117	}
				1118	cur = tmp;
				1119	if (*cur != '-') {
				1120	pr_warn("crashkernel: '-' expected\n");
				1121	return -EINVAL;
				1122	}
				1123	cur++;
				1124
				1125	/* if no ':' is here, than we read the end */
				1126	if (*cur != ':') {
				1127	end = memparse(cur, &tmp);
				1128	if (cur == tmp) {
				1129	pr_warn("crashkernel: Memory value expected\n");
				1130	return -EINVAL;
				1131	}
				1132	cur = tmp;
				1133	if (end <= start) {
				1134	pr_warn("crashkernel: end <= start\n");
				1135	return -EINVAL;
				1136	}
				1137	}
				1138
				1139	if (*cur != ':') {
				1140	pr_warn("crashkernel: ':' expected\n");
				1141	return -EINVAL;
				1142	}
				1143	cur++;
				1144
				1145	size = memparse(cur, &tmp);
				1146	if (cur == tmp) {
				1147	pr_warn("Memory value expected\n");
				1148	return -EINVAL;
				1149	}
				1150	cur = tmp;
				1151	if (size >= system_ram) {
				1152	pr_warn("crashkernel: invalid size\n");
				1153	return -EINVAL;
				1154	}
				1155
				1156	/* match ? */
				1157	if (system_ram >= start && system_ram < end) {
				1158	*crash_size = size;
				1159	break;
				1160	}
				1161	} while (*cur++ == ',');
				1162
				1163	if (*crash_size > 0) {
				1164	while (cur && cur != ' ' && *cur != '@')
				1165	cur++;
				1166	if (*cur == '@') {
				1167	cur++;
				1168	*crash_base = memparse(cur, &tmp);
				1169	if (cur == tmp) {
				1170	pr_warn("Memory value expected after '@'\n");
				1171	return -EINVAL;
				1172	}
				1173	}
				1174	}
				1175
				1176	return 0;
				1177	}
				1178
				1179	/*
				1180	* That function parses "simple" (old) crashkernel command lines like
				1181	*
				1182	* crashkernel=size[@offset]
				1183	*
				1184	* It returns 0 on success and -EINVAL on failure.
				1185	*/
				1186	static int __init parse_crashkernel_simple(char *cmdline,
				1187	unsigned long long *crash_size,
				1188	unsigned long long *crash_base)
				1189	{
				1190	char *cur = cmdline;
				1191
				1192	*crash_size = memparse(cmdline, &cur);
				1193	if (cmdline == cur) {
				1194	pr_warn("crashkernel: memory value expected\n");
				1195	return -EINVAL;
				1196	}
				1197
				1198	if (*cur == '@')
				1199	*crash_base = memparse(cur+1, &cur);
				1200	else if (cur != ' ' && cur != '\0') {
Borislav Petkov	53b90c0	2015-10-19 11:17:47 +0200	[diff] [blame]	1201	pr_warn("crashkernel: unrecognized char: %c\n", *cur);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1202	return -EINVAL;
				1203	}
				1204
				1205	return 0;
				1206	}
				1207
				1208	#define SUFFIX_HIGH 0
				1209	#define SUFFIX_LOW 1
				1210	#define SUFFIX_NULL 2
				1211	static __initdata char *suffix_tbl[] = {
				1212	[SUFFIX_HIGH] = ",high",
				1213	[SUFFIX_LOW] = ",low",
				1214	[SUFFIX_NULL] = NULL,
				1215	};
				1216
				1217	/*
				1218	* That function parses "suffix" crashkernel command lines like
				1219	*
				1220	* crashkernel=size,[high\|low]
				1221	*
				1222	* It returns 0 on success and -EINVAL on failure.
				1223	*/
				1224	static int __init parse_crashkernel_suffix(char *cmdline,
				1225	unsigned long long *crash_size,
				1226	const char *suffix)
				1227	{
				1228	char *cur = cmdline;
				1229
				1230	*crash_size = memparse(cmdline, &cur);
				1231	if (cmdline == cur) {
				1232	pr_warn("crashkernel: memory value expected\n");
				1233	return -EINVAL;
				1234	}
				1235
				1236	/* check with suffix */
				1237	if (strncmp(cur, suffix, strlen(suffix))) {
Borislav Petkov	53b90c0	2015-10-19 11:17:47 +0200	[diff] [blame]	1238	pr_warn("crashkernel: unrecognized char: %c\n", *cur);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1239	return -EINVAL;
				1240	}
				1241	cur += strlen(suffix);
				1242	if (cur != ' ' && cur != '\0') {
Borislav Petkov	53b90c0	2015-10-19 11:17:47 +0200	[diff] [blame]	1243	pr_warn("crashkernel: unrecognized char: %c\n", *cur);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1244	return -EINVAL;
				1245	}
				1246
				1247	return 0;
				1248	}
				1249
				1250	static __init char get_last_crashkernel(char cmdline,
				1251	const char *name,
				1252	const char *suffix)
				1253	{
				1254	char p = cmdline, ck_cmdline = NULL;
				1255
				1256	/* find crashkernel and use the last one if there are more */
				1257	p = strstr(p, name);
				1258	while (p) {
				1259	char *end_p = strchr(p, ' ');
				1260	char *q;
				1261
				1262	if (!end_p)
				1263	end_p = p + strlen(p);
				1264
				1265	if (!suffix) {
				1266	int i;
				1267
				1268	/* skip the one with any known suffix */
				1269	for (i = 0; suffix_tbl[i]; i++) {
				1270	q = end_p - strlen(suffix_tbl[i]);
				1271	if (!strncmp(q, suffix_tbl[i],
				1272	strlen(suffix_tbl[i])))
				1273	goto next;
				1274	}
				1275	ck_cmdline = p;
				1276	} else {
				1277	q = end_p - strlen(suffix);
				1278	if (!strncmp(q, suffix, strlen(suffix)))
				1279	ck_cmdline = p;
				1280	}
				1281	next:
				1282	p = strstr(p+1, name);
				1283	}
				1284
				1285	if (!ck_cmdline)
				1286	return NULL;
				1287
				1288	return ck_cmdline;
				1289	}
				1290
				1291	static int __init __parse_crashkernel(char *cmdline,
				1292	unsigned long long system_ram,
				1293	unsigned long long *crash_size,
				1294	unsigned long long *crash_base,
				1295	const char *name,
				1296	const char *suffix)
				1297	{
				1298	char first_colon, first_space;
				1299	char *ck_cmdline;
				1300
				1301	BUG_ON(!crash_size \|\| !crash_base);
				1302	*crash_size = 0;
				1303	*crash_base = 0;
				1304
				1305	ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
				1306
				1307	if (!ck_cmdline)
				1308	return -EINVAL;
				1309
				1310	ck_cmdline += strlen(name);
				1311
				1312	if (suffix)
				1313	return parse_crashkernel_suffix(ck_cmdline, crash_size,
				1314	suffix);
				1315	/*
				1316	* if the commandline contains a ':', then that's the extended
				1317	* syntax -- if not, it must be the classic syntax
				1318	*/
				1319	first_colon = strchr(ck_cmdline, ':');
				1320	first_space = strchr(ck_cmdline, ' ');
				1321	if (first_colon && (!first_space \|\| first_colon < first_space))
				1322	return parse_crashkernel_mem(ck_cmdline, system_ram,
				1323	crash_size, crash_base);
				1324
				1325	return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
				1326	}
				1327
				1328	/*
				1329	* That function is the entry point for command line parsing and should be
				1330	* called from the arch-specific code.
				1331	*/
				1332	int __init parse_crashkernel(char *cmdline,
				1333	unsigned long long system_ram,
				1334	unsigned long long *crash_size,
				1335	unsigned long long *crash_base)
				1336	{
				1337	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
				1338	"crashkernel=", NULL);
				1339	}
				1340
				1341	int __init parse_crashkernel_high(char *cmdline,
				1342	unsigned long long system_ram,
				1343	unsigned long long *crash_size,
				1344	unsigned long long *crash_base)
				1345	{
				1346	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
				1347	"crashkernel=", suffix_tbl[SUFFIX_HIGH]);
				1348	}
				1349
				1350	int __init parse_crashkernel_low(char *cmdline,
				1351	unsigned long long system_ram,
				1352	unsigned long long *crash_size,
				1353	unsigned long long *crash_base)
				1354	{
				1355	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
				1356	"crashkernel=", suffix_tbl[SUFFIX_LOW]);
				1357	}
				1358
				1359	static void update_vmcoreinfo_note(void)
				1360	{
				1361	u32 *buf = vmcoreinfo_note;
				1362
				1363	if (!vmcoreinfo_size)
				1364	return;
				1365	buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
				1366	vmcoreinfo_size);
				1367	final_note(buf);
				1368	}
				1369
				1370	void crash_save_vmcoreinfo(void)
				1371	{
				1372	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
				1373	update_vmcoreinfo_note();
				1374	}
				1375
				1376	void vmcoreinfo_append_str(const char *fmt, ...)
				1377	{
				1378	va_list args;
				1379	char buf[0x50];
				1380	size_t r;
				1381
				1382	va_start(args, fmt);
				1383	r = vscnprintf(buf, sizeof(buf), fmt, args);
				1384	va_end(args);
				1385
				1386	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
				1387
				1388	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
				1389
				1390	vmcoreinfo_size += r;
				1391	}
				1392
				1393	/*
				1394	* provide an empty default implementation here -- architecture
				1395	* code may override this
				1396	*/
				1397	void __weak arch_crash_save_vmcoreinfo(void)
				1398	{}
				1399
Russell King	dae2801	2016-08-02 14:06:00 -0700	[diff] [blame]	1400	phys_addr_t __weak paddr_vmcoreinfo_note(void)
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1401	{
				1402	return __pa((unsigned long)(char *)&vmcoreinfo_note);
				1403	}
				1404
				1405	static int __init crash_save_vmcoreinfo_init(void)
				1406	{
				1407	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
				1408	VMCOREINFO_PAGESIZE(PAGE_SIZE);
				1409
				1410	VMCOREINFO_SYMBOL(init_uts_ns);
				1411	VMCOREINFO_SYMBOL(node_online_map);
				1412	#ifdef CONFIG_MMU
				1413	VMCOREINFO_SYMBOL(swapper_pg_dir);
				1414	#endif
				1415	VMCOREINFO_SYMBOL(_stext);
				1416	VMCOREINFO_SYMBOL(vmap_area_list);
				1417
				1418	#ifndef CONFIG_NEED_MULTIPLE_NODES
				1419	VMCOREINFO_SYMBOL(mem_map);
				1420	VMCOREINFO_SYMBOL(contig_page_data);
				1421	#endif
				1422	#ifdef CONFIG_SPARSEMEM
				1423	VMCOREINFO_SYMBOL(mem_section);
				1424	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
				1425	VMCOREINFO_STRUCT_SIZE(mem_section);
				1426	VMCOREINFO_OFFSET(mem_section, section_mem_map);
				1427	#endif
				1428	VMCOREINFO_STRUCT_SIZE(page);
				1429	VMCOREINFO_STRUCT_SIZE(pglist_data);
				1430	VMCOREINFO_STRUCT_SIZE(zone);
				1431	VMCOREINFO_STRUCT_SIZE(free_area);
				1432	VMCOREINFO_STRUCT_SIZE(list_head);
				1433	VMCOREINFO_SIZE(nodemask_t);
				1434	VMCOREINFO_OFFSET(page, flags);
Joonsoo Kim	0139aa7	2016-05-19 17:10:49 -0700	[diff] [blame]	1435	VMCOREINFO_OFFSET(page, _refcount);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1436	VMCOREINFO_OFFSET(page, mapping);
				1437	VMCOREINFO_OFFSET(page, lru);
				1438	VMCOREINFO_OFFSET(page, _mapcount);
				1439	VMCOREINFO_OFFSET(page, private);
Atsushi Kumagai	8639a84	2016-04-28 16:18:18 -0700	[diff] [blame]	1440	VMCOREINFO_OFFSET(page, compound_dtor);
				1441	VMCOREINFO_OFFSET(page, compound_order);
Atsushi Kumagai	d7f5351	2016-04-28 16:18:21 -0700	[diff] [blame]	1442	VMCOREINFO_OFFSET(page, compound_head);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1443	VMCOREINFO_OFFSET(pglist_data, node_zones);
				1444	VMCOREINFO_OFFSET(pglist_data, nr_zones);
				1445	#ifdef CONFIG_FLAT_NODE_MEM_MAP
				1446	VMCOREINFO_OFFSET(pglist_data, node_mem_map);
				1447	#endif
				1448	VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
				1449	VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
				1450	VMCOREINFO_OFFSET(pglist_data, node_id);
				1451	VMCOREINFO_OFFSET(zone, free_area);
				1452	VMCOREINFO_OFFSET(zone, vm_stat);
				1453	VMCOREINFO_OFFSET(zone, spanned_pages);
				1454	VMCOREINFO_OFFSET(free_area, free_list);
				1455	VMCOREINFO_OFFSET(list_head, next);
				1456	VMCOREINFO_OFFSET(list_head, prev);
				1457	VMCOREINFO_OFFSET(vmap_area, va_start);
				1458	VMCOREINFO_OFFSET(vmap_area, list);
				1459	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
				1460	log_buf_kexec_setup();
				1461	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
				1462	VMCOREINFO_NUMBER(NR_FREE_PAGES);
				1463	VMCOREINFO_NUMBER(PG_lru);
				1464	VMCOREINFO_NUMBER(PG_private);
				1465	VMCOREINFO_NUMBER(PG_swapcache);
				1466	VMCOREINFO_NUMBER(PG_slab);
				1467	#ifdef CONFIG_MEMORY_FAILURE
				1468	VMCOREINFO_NUMBER(PG_hwpoison);
				1469	#endif
				1470	VMCOREINFO_NUMBER(PG_head_mask);
				1471	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
Baoquan He	1303a27	2015-09-09 15:39:03 -0700	[diff] [blame]	1472	#ifdef CONFIG_X86
				1473	VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
				1474	#endif
Atsushi Kumagai	8639a84	2016-04-28 16:18:18 -0700	[diff] [blame]	1475	#ifdef CONFIG_HUGETLB_PAGE
				1476	VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1477	#endif
				1478
				1479	arch_crash_save_vmcoreinfo();
				1480	update_vmcoreinfo_note();
				1481
				1482	return 0;
				1483	}
				1484
				1485	subsys_initcall(crash_save_vmcoreinfo_init);
				1486
				1487	/*
				1488	* Move into place and start executing a preloaded standalone
				1489	* executable. If nothing was preloaded return an error.
				1490	*/
				1491	int kernel_kexec(void)
				1492	{
				1493	int error = 0;
				1494
				1495	if (!mutex_trylock(&kexec_mutex))
				1496	return -EBUSY;
				1497	if (!kexec_image) {
				1498	error = -EINVAL;
				1499	goto Unlock;
				1500	}
				1501
				1502	#ifdef CONFIG_KEXEC_JUMP
				1503	if (kexec_image->preserve_context) {
				1504	lock_system_sleep();
				1505	pm_prepare_console();
				1506	error = freeze_processes();
				1507	if (error) {
				1508	error = -EBUSY;
				1509	goto Restore_console;
				1510	}
				1511	suspend_console();
				1512	error = dpm_suspend_start(PMSG_FREEZE);
				1513	if (error)
				1514	goto Resume_console;
				1515	/* At this point, dpm_suspend_start() has been called,
				1516	* but not dpm_suspend_end(). We must call
				1517	* dpm_suspend_end() now. Otherwise, drivers for
				1518	* some devices (e.g. interrupt controllers) become
				1519	* desynchronized with the actual state of the
				1520	* hardware at resume time, and evil weirdness ensues.
				1521	*/
				1522	error = dpm_suspend_end(PMSG_FREEZE);
				1523	if (error)
				1524	goto Resume_devices;
				1525	error = disable_nonboot_cpus();
				1526	if (error)
				1527	goto Enable_cpus;
				1528	local_irq_disable();
				1529	error = syscore_suspend();
				1530	if (error)
				1531	goto Enable_irqs;
				1532	} else
				1533	#endif
				1534	{
				1535	kexec_in_progress = true;
				1536	kernel_restart_prepare(NULL);
				1537	migrate_to_reboot_cpu();
				1538
				1539	/*
				1540	* migrate_to_reboot_cpu() disables CPU hotplug assuming that
				1541	* no further code needs to use CPU hotplug (which is true in
				1542	* the reboot case). However, the kexec path depends on using
				1543	* CPU hotplug again; so re-enable it here.
				1544	*/
				1545	cpu_hotplug_enable();
				1546	pr_emerg("Starting new kernel\n");
				1547	machine_shutdown();
				1548	}
				1549
				1550	machine_kexec(kexec_image);
				1551
				1552	#ifdef CONFIG_KEXEC_JUMP
				1553	if (kexec_image->preserve_context) {
				1554	syscore_resume();
				1555	Enable_irqs:
				1556	local_irq_enable();
				1557	Enable_cpus:
				1558	enable_nonboot_cpus();
				1559	dpm_resume_start(PMSG_RESTORE);
				1560	Resume_devices:
				1561	dpm_resume_end(PMSG_RESTORE);
				1562	Resume_console:
				1563	resume_console();
				1564	thaw_processes();
				1565	Restore_console:
				1566	pm_restore_console();
				1567	unlock_system_sleep();
				1568	}
				1569	#endif
				1570
				1571	Unlock:
				1572	mutex_unlock(&kexec_mutex);
				1573	return error;
				1574	}
				1575
				1576	/*
Xunlei Pang	7a0058e	2016-05-23 16:24:22 -0700	[diff] [blame]	1577	* Protection mechanism for crashkernel reserved memory after
				1578	* the kdump kernel is loaded.
Dave Young	2965faa	2015-09-09 15:38:55 -0700	[diff] [blame]	1579	*
				1580	* Provide an empty default implementation here -- architecture
				1581	* code may override this
				1582	*/
Xunlei Pang	9b492cf	2016-05-23 16:24:10 -0700	[diff] [blame]	1583	void __weak arch_kexec_protect_crashkres(void)
				1584	{}
				1585
				1586	void __weak arch_kexec_unprotect_crashkres(void)
				1587	{}