| /* |
| * Hypervisor-assisted dump |
| * |
| * Linas Vepstas, Manish Ahuja 2008 |
| * Copyright 2008 IBM Corp. |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License |
| * as published by the Free Software Foundation; either version |
| * 2 of the License, or (at your option) any later version. |
| * |
| */ |
| |
| #include <linux/gfp.h> |
| #include <linux/init.h> |
| #include <linux/kobject.h> |
| #include <linux/mm.h> |
| #include <linux/of.h> |
| #include <linux/pfn.h> |
| #include <linux/swap.h> |
| #include <linux/sysfs.h> |
| |
| #include <asm/page.h> |
| #include <asm/phyp_dump.h> |
| #include <asm/machdep.h> |
| #include <asm/prom.h> |
| #include <asm/rtas.h> |
| |
| /* Variables, used to communicate data between early boot and late boot */ |
| static struct phyp_dump phyp_dump_vars; |
| struct phyp_dump *phyp_dump_info = &phyp_dump_vars; |
| |
| static int ibm_configure_kernel_dump; |
| /* ------------------------------------------------- */ |
| /* RTAS interfaces to declare the dump regions */ |
| |
| struct dump_section { |
| u32 dump_flags; |
| u16 source_type; |
| u16 error_flags; |
| u64 source_address; |
| u64 source_length; |
| u64 length_copied; |
| u64 destination_address; |
| }; |
| |
| struct phyp_dump_header { |
| u32 version; |
| u16 num_of_sections; |
| u16 status; |
| |
| u32 first_offset_section; |
| u32 dump_disk_section; |
| u64 block_num_dd; |
| u64 num_of_blocks_dd; |
| u32 offset_dd; |
| u32 maxtime_to_auto; |
| /* No dump disk path string used */ |
| |
| struct dump_section cpu_data; |
| struct dump_section hpte_data; |
| struct dump_section kernel_data; |
| }; |
| |
| /* The dump header *must be* in low memory, so .bss it */ |
| static struct phyp_dump_header phdr; |
| |
| #define NUM_DUMP_SECTIONS 3 |
| #define DUMP_HEADER_VERSION 0x1 |
| #define DUMP_REQUEST_FLAG 0x1 |
| #define DUMP_SOURCE_CPU 0x0001 |
| #define DUMP_SOURCE_HPTE 0x0002 |
| #define DUMP_SOURCE_RMO 0x0011 |
| #define DUMP_ERROR_FLAG 0x2000 |
| #define DUMP_TRIGGERED 0x4000 |
| #define DUMP_PERFORMED 0x8000 |
| |
| |
| /** |
| * init_dump_header() - initialize the header declaring a dump |
| * Returns: length of dump save area. |
| * |
| * When the hypervisor saves crashed state, it needs to put |
| * it somewhere. The dump header tells the hypervisor where |
| * the data can be saved. |
| */ |
| static unsigned long init_dump_header(struct phyp_dump_header *ph) |
| { |
| unsigned long addr_offset = 0; |
| |
| /* Set up the dump header */ |
| ph->version = DUMP_HEADER_VERSION; |
| ph->num_of_sections = NUM_DUMP_SECTIONS; |
| ph->status = 0; |
| |
| ph->first_offset_section = |
| (u32)offsetof(struct phyp_dump_header, cpu_data); |
| ph->dump_disk_section = 0; |
| ph->block_num_dd = 0; |
| ph->num_of_blocks_dd = 0; |
| ph->offset_dd = 0; |
| |
| ph->maxtime_to_auto = 0; /* disabled */ |
| |
| /* The first two sections are mandatory */ |
| ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG; |
| ph->cpu_data.source_type = DUMP_SOURCE_CPU; |
| ph->cpu_data.source_address = 0; |
| ph->cpu_data.source_length = phyp_dump_info->cpu_state_size; |
| ph->cpu_data.destination_address = addr_offset; |
| addr_offset += phyp_dump_info->cpu_state_size; |
| |
| ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG; |
| ph->hpte_data.source_type = DUMP_SOURCE_HPTE; |
| ph->hpte_data.source_address = 0; |
| ph->hpte_data.source_length = phyp_dump_info->hpte_region_size; |
| ph->hpte_data.destination_address = addr_offset; |
| addr_offset += phyp_dump_info->hpte_region_size; |
| |
| /* This section describes the low kernel region */ |
| ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG; |
| ph->kernel_data.source_type = DUMP_SOURCE_RMO; |
| ph->kernel_data.source_address = PHYP_DUMP_RMR_START; |
| ph->kernel_data.source_length = PHYP_DUMP_RMR_END; |
| ph->kernel_data.destination_address = addr_offset; |
| addr_offset += ph->kernel_data.source_length; |
| |
| return addr_offset; |
| } |
| |
| static void print_dump_header(const struct phyp_dump_header *ph) |
| { |
| #ifdef DEBUG |
| if (ph == NULL) |
| return; |
| |
| printk(KERN_INFO "dump header:\n"); |
| /* setup some ph->sections required */ |
| printk(KERN_INFO "version = %d\n", ph->version); |
| printk(KERN_INFO "Sections = %d\n", ph->num_of_sections); |
| printk(KERN_INFO "Status = 0x%x\n", ph->status); |
| |
| /* No ph->disk, so all should be set to 0 */ |
| printk(KERN_INFO "Offset to first section 0x%x\n", |
| ph->first_offset_section); |
| printk(KERN_INFO "dump disk sections should be zero\n"); |
| printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section); |
| printk(KERN_INFO "block num = %lld\n", ph->block_num_dd); |
| printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd); |
| printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd); |
| printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto); |
| |
| /*set cpu state and hpte states as well scratch pad area */ |
| printk(KERN_INFO " CPU AREA\n"); |
| printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags); |
| printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type); |
| printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags); |
| printk(KERN_INFO "cpu source_address =%llx\n", |
| ph->cpu_data.source_address); |
| printk(KERN_INFO "cpu source_length =%llx\n", |
| ph->cpu_data.source_length); |
| printk(KERN_INFO "cpu length_copied =%llx\n", |
| ph->cpu_data.length_copied); |
| |
| printk(KERN_INFO " HPTE AREA\n"); |
| printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags); |
| printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type); |
| printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags); |
| printk(KERN_INFO "HPTE source_address =%llx\n", |
| ph->hpte_data.source_address); |
| printk(KERN_INFO "HPTE source_length =%llx\n", |
| ph->hpte_data.source_length); |
| printk(KERN_INFO "HPTE length_copied =%llx\n", |
| ph->hpte_data.length_copied); |
| |
| printk(KERN_INFO " SRSD AREA\n"); |
| printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags); |
| printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type); |
| printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags); |
| printk(KERN_INFO "SRSD source_address =%llx\n", |
| ph->kernel_data.source_address); |
| printk(KERN_INFO "SRSD source_length =%llx\n", |
| ph->kernel_data.source_length); |
| printk(KERN_INFO "SRSD length_copied =%llx\n", |
| ph->kernel_data.length_copied); |
| #endif |
| } |
| |
| static ssize_t show_phyp_dump_active(struct kobject *kobj, |
| struct kobj_attribute *attr, char *buf) |
| { |
| |
| /* create filesystem entry so kdump is phyp-dump aware */ |
| return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot); |
| } |
| |
| static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600, |
| show_phyp_dump_active, |
| NULL); |
| |
| static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr) |
| { |
| int rc; |
| |
| /* Add addr value if not initialized before */ |
| if (ph->cpu_data.destination_address == 0) { |
| ph->cpu_data.destination_address += addr; |
| ph->hpte_data.destination_address += addr; |
| ph->kernel_data.destination_address += addr; |
| } |
| |
| /* ToDo Invalidate kdump and free memory range. */ |
| |
| do { |
| rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL, |
| 1, ph, sizeof(struct phyp_dump_header)); |
| } while (rtas_busy_delay(rc)); |
| |
| if (rc) { |
| printk(KERN_ERR "phyp-dump: unexpected error (%d) on " |
| "register\n", rc); |
| print_dump_header(ph); |
| return; |
| } |
| |
| rc = sysfs_create_file(kernel_kobj, &pdl.attr); |
| if (rc) |
| printk(KERN_ERR "phyp-dump: unable to create sysfs" |
| " file (%d)\n", rc); |
| } |
| |
| static |
| void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr) |
| { |
| int rc; |
| |
| /* Add addr value if not initialized before */ |
| if (ph->cpu_data.destination_address == 0) { |
| ph->cpu_data.destination_address += addr; |
| ph->hpte_data.destination_address += addr; |
| ph->kernel_data.destination_address += addr; |
| } |
| |
| do { |
| rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL, |
| 2, ph, sizeof(struct phyp_dump_header)); |
| } while (rtas_busy_delay(rc)); |
| |
| if (rc) { |
| printk(KERN_ERR "phyp-dump: unexpected error (%d) " |
| "on invalidate\n", rc); |
| print_dump_header(ph); |
| } |
| } |
| |
| /* ------------------------------------------------- */ |
| /** |
| * release_memory_range -- release memory previously lmb_reserved |
| * @start_pfn: starting physical frame number |
| * @nr_pages: number of pages to free. |
| * |
| * This routine will release memory that had been previously |
| * lmb_reserved in early boot. The released memory becomes |
| * available for genreal use. |
| */ |
| static void release_memory_range(unsigned long start_pfn, |
| unsigned long nr_pages) |
| { |
| struct page *rpage; |
| unsigned long end_pfn; |
| long i; |
| |
| end_pfn = start_pfn + nr_pages; |
| |
| for (i = start_pfn; i <= end_pfn; i++) { |
| rpage = pfn_to_page(i); |
| if (PageReserved(rpage)) { |
| ClearPageReserved(rpage); |
| init_page_count(rpage); |
| __free_page(rpage); |
| totalram_pages++; |
| } |
| } |
| } |
| |
| /** |
| * track_freed_range -- Counts the range being freed. |
| * Once the counter goes to zero, it re-registers dump for |
| * future use. |
| */ |
| static void |
| track_freed_range(unsigned long addr, unsigned long length) |
| { |
| static unsigned long scratch_area_size, reserved_area_size; |
| |
| if (addr < phyp_dump_info->init_reserve_start) |
| return; |
| |
| if ((addr >= phyp_dump_info->init_reserve_start) && |
| (addr <= phyp_dump_info->init_reserve_start + |
| phyp_dump_info->init_reserve_size)) |
| reserved_area_size += length; |
| |
| if ((addr >= phyp_dump_info->reserved_scratch_addr) && |
| (addr <= phyp_dump_info->reserved_scratch_addr + |
| phyp_dump_info->reserved_scratch_size)) |
| scratch_area_size += length; |
| |
| if ((reserved_area_size == phyp_dump_info->init_reserve_size) && |
| (scratch_area_size == phyp_dump_info->reserved_scratch_size)) { |
| |
| invalidate_last_dump(&phdr, |
| phyp_dump_info->reserved_scratch_addr); |
| register_dump_area(&phdr, |
| phyp_dump_info->reserved_scratch_addr); |
| } |
| } |
| |
| /* ------------------------------------------------- */ |
| /** |
| * sysfs_release_region -- sysfs interface to release memory range. |
| * |
| * Usage: |
| * "echo <start addr> <length> > /sys/kernel/release_region" |
| * |
| * Example: |
| * "echo 0x40000000 0x10000000 > /sys/kernel/release_region" |
| * |
| * will release 256MB starting at 1GB. |
| */ |
| static ssize_t store_release_region(struct kobject *kobj, |
| struct kobj_attribute *attr, |
| const char *buf, size_t count) |
| { |
| unsigned long start_addr, length, end_addr; |
| unsigned long start_pfn, nr_pages; |
| ssize_t ret; |
| |
| ret = sscanf(buf, "%lx %lx", &start_addr, &length); |
| if (ret != 2) |
| return -EINVAL; |
| |
| track_freed_range(start_addr, length); |
| |
| /* Range-check - don't free any reserved memory that |
| * wasn't reserved for phyp-dump */ |
| if (start_addr < phyp_dump_info->init_reserve_start) |
| start_addr = phyp_dump_info->init_reserve_start; |
| |
| end_addr = phyp_dump_info->init_reserve_start + |
| phyp_dump_info->init_reserve_size; |
| if (start_addr+length > end_addr) |
| length = end_addr - start_addr; |
| |
| /* Release the region of memory assed in by user */ |
| start_pfn = PFN_DOWN(start_addr); |
| nr_pages = PFN_DOWN(length); |
| release_memory_range(start_pfn, nr_pages); |
| |
| return count; |
| } |
| |
| static ssize_t show_release_region(struct kobject *kobj, |
| struct kobj_attribute *attr, char *buf) |
| { |
| u64 second_addr_range; |
| |
| /* total reserved size - start of scratch area */ |
| second_addr_range = phyp_dump_info->init_reserve_size - |
| phyp_dump_info->reserved_scratch_size; |
| return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:" |
| " DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n", |
| phdr.cpu_data.destination_address, |
| phdr.cpu_data.length_copied, |
| phdr.hpte_data.destination_address, |
| phdr.hpte_data.length_copied, |
| phdr.kernel_data.destination_address, |
| phdr.kernel_data.length_copied, |
| phyp_dump_info->init_reserve_start, |
| second_addr_range); |
| } |
| |
| static struct kobj_attribute rr = __ATTR(release_region, 0600, |
| show_release_region, |
| store_release_region); |
| |
| static int __init phyp_dump_setup(void) |
| { |
| struct device_node *rtas; |
| const struct phyp_dump_header *dump_header = NULL; |
| unsigned long dump_area_start; |
| unsigned long dump_area_length; |
| int header_len = 0; |
| int rc; |
| |
| /* If no memory was reserved in early boot, there is nothing to do */ |
| if (phyp_dump_info->init_reserve_size == 0) |
| return 0; |
| |
| /* Return if phyp dump not supported */ |
| if (!phyp_dump_info->phyp_dump_configured) |
| return -ENOSYS; |
| |
| /* Is there dump data waiting for us? If there isn't, |
| * then register a new dump area, and release all of |
| * the rest of the reserved ram. |
| * |
| * The /rtas/ibm,kernel-dump rtas node is present only |
| * if there is dump data waiting for us. |
| */ |
| rtas = of_find_node_by_path("/rtas"); |
| if (rtas) { |
| dump_header = of_get_property(rtas, "ibm,kernel-dump", |
| &header_len); |
| of_node_put(rtas); |
| } |
| |
| ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump"); |
| |
| print_dump_header(dump_header); |
| dump_area_length = init_dump_header(&phdr); |
| /* align down */ |
| dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK; |
| |
| if (dump_header == NULL) { |
| register_dump_area(&phdr, dump_area_start); |
| return 0; |
| } |
| |
| /* re-register the dump area, if old dump was invalid */ |
| if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) { |
| invalidate_last_dump(&phdr, dump_area_start); |
| register_dump_area(&phdr, dump_area_start); |
| return 0; |
| } |
| |
| if (dump_header) { |
| phyp_dump_info->reserved_scratch_addr = |
| dump_header->cpu_data.destination_address; |
| phyp_dump_info->reserved_scratch_size = |
| dump_header->cpu_data.source_length + |
| dump_header->hpte_data.source_length + |
| dump_header->kernel_data.source_length; |
| } |
| |
| /* Should we create a dump_subsys, analogous to s390/ipl.c ? */ |
| rc = sysfs_create_file(kernel_kobj, &rr.attr); |
| if (rc) |
| printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n", |
| rc); |
| |
| /* ToDo: re-register the dump area, for next time. */ |
| return 0; |
| } |
| machine_subsys_initcall(pseries, phyp_dump_setup); |
| |
| int __init early_init_dt_scan_phyp_dump(unsigned long node, |
| const char *uname, int depth, void *data) |
| { |
| const unsigned int *sizes; |
| |
| phyp_dump_info->phyp_dump_configured = 0; |
| phyp_dump_info->phyp_dump_is_active = 0; |
| |
| if (depth != 1 || strcmp(uname, "rtas") != 0) |
| return 0; |
| |
| if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL)) |
| phyp_dump_info->phyp_dump_configured++; |
| |
| if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL)) |
| phyp_dump_info->phyp_dump_is_active++; |
| |
| sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes", |
| NULL); |
| if (!sizes) |
| return 0; |
| |
| if (sizes[0] == 1) |
| phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]); |
| |
| if (sizes[3] == 2) |
| phyp_dump_info->hpte_region_size = |
| *((unsigned long *)&sizes[4]); |
| return 1; |
| } |
| |
| /* Look for phyp_dump= cmdline option */ |
| static int __init early_phyp_dump_enabled(char *p) |
| { |
| phyp_dump_info->phyp_dump_at_boot = 1; |
| |
| if (!p) |
| return 0; |
| |
| if (strncmp(p, "1", 1) == 0) |
| phyp_dump_info->phyp_dump_at_boot = 1; |
| else if (strncmp(p, "0", 1) == 0) |
| phyp_dump_info->phyp_dump_at_boot = 0; |
| |
| return 0; |
| } |
| early_param("phyp_dump", early_phyp_dump_enabled); |
| |
| /* Look for phyp_dump_reserve_size= cmdline option */ |
| static int __init early_phyp_dump_reserve_size(char *p) |
| { |
| if (p) |
| phyp_dump_info->reserve_bootvar = memparse(p, &p); |
| |
| return 0; |
| } |
| early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size); |