blob: edbc012c2ebca1cff90c13d713d2e442e3ac52a1 [file] [log] [blame]
Manish Ahuja6ac26c82008-03-22 10:37:08 +11001/*
2 * Hypervisor-assisted dump
3 *
4 * Linas Vepstas, Manish Ahuja 2008
5 * Copyright 2008 IBM Corp.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 */
13
14#include <linux/init.h>
Manish Ahujad5a29c72008-03-22 10:39:17 +110015#include <linux/kobject.h>
Manish Ahuja6ac26c82008-03-22 10:37:08 +110016#include <linux/mm.h>
Manish Ahujad5a29c72008-03-22 10:39:17 +110017#include <linux/of.h>
Manish Ahuja6ac26c82008-03-22 10:37:08 +110018#include <linux/pfn.h>
19#include <linux/swap.h>
Manish Ahujad5a29c72008-03-22 10:39:17 +110020#include <linux/sysfs.h>
Manish Ahuja6ac26c82008-03-22 10:37:08 +110021
22#include <asm/page.h>
23#include <asm/phyp_dump.h>
24#include <asm/machdep.h>
25#include <asm/prom.h>
Manish Ahujad5a29c72008-03-22 10:39:17 +110026#include <asm/rtas.h>
Manish Ahuja6ac26c82008-03-22 10:37:08 +110027
28/* Variables, used to communicate data between early boot and late boot */
29static struct phyp_dump phyp_dump_vars;
30struct phyp_dump *phyp_dump_info = &phyp_dump_vars;
31
Manish Ahuja2c4f4112008-03-22 10:43:15 +110032static int ibm_configure_kernel_dump;
33/* ------------------------------------------------- */
34/* RTAS interfaces to declare the dump regions */
35
36struct dump_section {
37 u32 dump_flags;
38 u16 source_type;
39 u16 error_flags;
40 u64 source_address;
41 u64 source_length;
42 u64 length_copied;
43 u64 destination_address;
44};
45
46struct phyp_dump_header {
47 u32 version;
48 u16 num_of_sections;
49 u16 status;
50
51 u32 first_offset_section;
52 u32 dump_disk_section;
53 u64 block_num_dd;
54 u64 num_of_blocks_dd;
55 u32 offset_dd;
56 u32 maxtime_to_auto;
57 /* No dump disk path string used */
58
59 struct dump_section cpu_data;
60 struct dump_section hpte_data;
61 struct dump_section kernel_data;
62};
63
64/* The dump header *must be* in low memory, so .bss it */
65static struct phyp_dump_header phdr;
66
67#define NUM_DUMP_SECTIONS 3
68#define DUMP_HEADER_VERSION 0x1
69#define DUMP_REQUEST_FLAG 0x1
70#define DUMP_SOURCE_CPU 0x0001
71#define DUMP_SOURCE_HPTE 0x0002
72#define DUMP_SOURCE_RMO 0x0011
Manish Ahujaa9c508d2008-03-22 10:45:22 +110073#define DUMP_ERROR_FLAG 0x2000
74#define DUMP_TRIGGERED 0x4000
75#define DUMP_PERFORMED 0x8000
76
Manish Ahuja2c4f4112008-03-22 10:43:15 +110077
78/**
79 * init_dump_header() - initialize the header declaring a dump
80 * Returns: length of dump save area.
81 *
82 * When the hypervisor saves crashed state, it needs to put
83 * it somewhere. The dump header tells the hypervisor where
84 * the data can be saved.
85 */
86static unsigned long init_dump_header(struct phyp_dump_header *ph)
87{
88 unsigned long addr_offset = 0;
89
90 /* Set up the dump header */
91 ph->version = DUMP_HEADER_VERSION;
92 ph->num_of_sections = NUM_DUMP_SECTIONS;
93 ph->status = 0;
94
95 ph->first_offset_section =
96 (u32)offsetof(struct phyp_dump_header, cpu_data);
97 ph->dump_disk_section = 0;
98 ph->block_num_dd = 0;
99 ph->num_of_blocks_dd = 0;
100 ph->offset_dd = 0;
101
102 ph->maxtime_to_auto = 0; /* disabled */
103
104 /* The first two sections are mandatory */
105 ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;
106 ph->cpu_data.source_type = DUMP_SOURCE_CPU;
107 ph->cpu_data.source_address = 0;
108 ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;
109 ph->cpu_data.destination_address = addr_offset;
110 addr_offset += phyp_dump_info->cpu_state_size;
111
112 ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;
113 ph->hpte_data.source_type = DUMP_SOURCE_HPTE;
114 ph->hpte_data.source_address = 0;
115 ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;
116 ph->hpte_data.destination_address = addr_offset;
117 addr_offset += phyp_dump_info->hpte_region_size;
118
119 /* This section describes the low kernel region */
120 ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;
121 ph->kernel_data.source_type = DUMP_SOURCE_RMO;
122 ph->kernel_data.source_address = PHYP_DUMP_RMR_START;
123 ph->kernel_data.source_length = PHYP_DUMP_RMR_END;
124 ph->kernel_data.destination_address = addr_offset;
125 addr_offset += ph->kernel_data.source_length;
126
127 return addr_offset;
128}
129
Manish Ahuja599c1aa2008-03-22 10:44:05 +1100130static void print_dump_header(const struct phyp_dump_header *ph)
131{
132#ifdef DEBUG
133 printk(KERN_INFO "dump header:\n");
134 /* setup some ph->sections required */
135 printk(KERN_INFO "version = %d\n", ph->version);
136 printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);
137 printk(KERN_INFO "Status = 0x%x\n", ph->status);
138
139 /* No ph->disk, so all should be set to 0 */
140 printk(KERN_INFO "Offset to first section 0x%x\n",
141 ph->first_offset_section);
142 printk(KERN_INFO "dump disk sections should be zero\n");
143 printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);
144 printk(KERN_INFO "block num = %ld\n", ph->block_num_dd);
145 printk(KERN_INFO "number of blocks = %ld\n", ph->num_of_blocks_dd);
146 printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);
147 printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);
148
149 /*set cpu state and hpte states as well scratch pad area */
150 printk(KERN_INFO " CPU AREA \n");
151 printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);
152 printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);
153 printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);
154 printk(KERN_INFO "cpu source_address =%lx\n",
155 ph->cpu_data.source_address);
156 printk(KERN_INFO "cpu source_length =%lx\n",
157 ph->cpu_data.source_length);
158 printk(KERN_INFO "cpu length_copied =%lx\n",
159 ph->cpu_data.length_copied);
160
161 printk(KERN_INFO " HPTE AREA \n");
162 printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);
163 printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);
164 printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);
165 printk(KERN_INFO "HPTE source_address =%lx\n",
166 ph->hpte_data.source_address);
167 printk(KERN_INFO "HPTE source_length =%lx\n",
168 ph->hpte_data.source_length);
169 printk(KERN_INFO "HPTE length_copied =%lx\n",
170 ph->hpte_data.length_copied);
171
172 printk(KERN_INFO " SRSD AREA \n");
173 printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);
174 printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);
175 printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);
176 printk(KERN_INFO "SRSD source_address =%lx\n",
177 ph->kernel_data.source_address);
178 printk(KERN_INFO "SRSD source_length =%lx\n",
179 ph->kernel_data.source_length);
180 printk(KERN_INFO "SRSD length_copied =%lx\n",
181 ph->kernel_data.length_copied);
182#endif
183}
184
Manish Ahuja7415d5e2008-03-22 11:40:53 +1100185static ssize_t show_phyp_dump_active(struct kobject *kobj,
186 struct kobj_attribute *attr, char *buf)
187{
188
189 /* create filesystem entry so kdump is phyp-dump aware */
190 return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);
191}
192
193static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,
194 show_phyp_dump_active,
195 NULL);
196
Manish Ahuja2c4f4112008-03-22 10:43:15 +1100197static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)
198{
199 int rc;
Manish Ahujaa9c508d2008-03-22 10:45:22 +1100200
201 /* Add addr value if not initialized before */
202 if (ph->cpu_data.destination_address == 0) {
203 ph->cpu_data.destination_address += addr;
204 ph->hpte_data.destination_address += addr;
205 ph->kernel_data.destination_address += addr;
206 }
207
208 /* ToDo Invalidate kdump and free memory range. */
Manish Ahuja2c4f4112008-03-22 10:43:15 +1100209
210 do {
211 rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
212 1, ph, sizeof(struct phyp_dump_header));
213 } while (rtas_busy_delay(rc));
214
Manish Ahuja599c1aa2008-03-22 10:44:05 +1100215 if (rc) {
Manish Ahuja2c4f4112008-03-22 10:43:15 +1100216 printk(KERN_ERR "phyp-dump: unexpected error (%d) on "
217 "register\n", rc);
Manish Ahuja599c1aa2008-03-22 10:44:05 +1100218 print_dump_header(ph);
Manish Ahuja7415d5e2008-03-22 11:40:53 +1100219 return;
Manish Ahuja599c1aa2008-03-22 10:44:05 +1100220 }
Manish Ahuja7415d5e2008-03-22 11:40:53 +1100221
222 rc = sysfs_create_file(kernel_kobj, &pdl.attr);
223 if (rc)
224 printk(KERN_ERR "phyp-dump: unable to create sysfs"
225 " file (%d)\n", rc);
Manish Ahuja2c4f4112008-03-22 10:43:15 +1100226}
227
Manish Ahujaa9c508d2008-03-22 10:45:22 +1100228static
229void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
230{
231 int rc;
232
233 /* Add addr value if not initialized before */
234 if (ph->cpu_data.destination_address == 0) {
235 ph->cpu_data.destination_address += addr;
236 ph->hpte_data.destination_address += addr;
237 ph->kernel_data.destination_address += addr;
238 }
239
240 do {
241 rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
242 2, ph, sizeof(struct phyp_dump_header));
243 } while (rtas_busy_delay(rc));
244
245 if (rc) {
246 printk(KERN_ERR "phyp-dump: unexpected error (%d) "
247 "on invalidate\n", rc);
248 print_dump_header(ph);
249 }
250}
251
Manish Ahujad5a29c72008-03-22 10:39:17 +1100252/* ------------------------------------------------- */
Manish Ahuja6ac26c82008-03-22 10:37:08 +1100253/**
254 * release_memory_range -- release memory previously lmb_reserved
255 * @start_pfn: starting physical frame number
256 * @nr_pages: number of pages to free.
257 *
258 * This routine will release memory that had been previously
259 * lmb_reserved in early boot. The released memory becomes
260 * available for genreal use.
261 */
Manish Ahujaa9c508d2008-03-22 10:45:22 +1100262static void release_memory_range(unsigned long start_pfn,
263 unsigned long nr_pages)
Manish Ahuja6ac26c82008-03-22 10:37:08 +1100264{
265 struct page *rpage;
266 unsigned long end_pfn;
267 long i;
268
269 end_pfn = start_pfn + nr_pages;
270
271 for (i = start_pfn; i <= end_pfn; i++) {
272 rpage = pfn_to_page(i);
273 if (PageReserved(rpage)) {
274 ClearPageReserved(rpage);
275 init_page_count(rpage);
276 __free_page(rpage);
277 totalram_pages++;
278 }
279 }
280}
281
Manish Ahujafd35cff2008-03-22 10:47:04 +1100282/**
283 * track_freed_range -- Counts the range being freed.
284 * Once the counter goes to zero, it re-registers dump for
285 * future use.
286 */
287static void
288track_freed_range(unsigned long addr, unsigned long length)
289{
290 static unsigned long scratch_area_size, reserved_area_size;
291
292 if (addr < phyp_dump_info->init_reserve_start)
293 return;
294
295 if ((addr >= phyp_dump_info->init_reserve_start) &&
296 (addr <= phyp_dump_info->init_reserve_start +
297 phyp_dump_info->init_reserve_size))
298 reserved_area_size += length;
299
300 if ((addr >= phyp_dump_info->reserved_scratch_addr) &&
301 (addr <= phyp_dump_info->reserved_scratch_addr +
302 phyp_dump_info->reserved_scratch_size))
303 scratch_area_size += length;
304
305 if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&
306 (scratch_area_size == phyp_dump_info->reserved_scratch_size)) {
307
308 invalidate_last_dump(&phdr,
309 phyp_dump_info->reserved_scratch_addr);
310 register_dump_area(&phdr,
311 phyp_dump_info->reserved_scratch_addr);
312 }
313}
314
Manish Ahujad5a29c72008-03-22 10:39:17 +1100315/* ------------------------------------------------- */
316/**
317 * sysfs_release_region -- sysfs interface to release memory range.
318 *
319 * Usage:
320 * "echo <start addr> <length> > /sys/kernel/release_region"
321 *
322 * Example:
323 * "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
324 *
325 * will release 256MB starting at 1GB.
326 */
327static ssize_t store_release_region(struct kobject *kobj,
328 struct kobj_attribute *attr,
329 const char *buf, size_t count)
330{
331 unsigned long start_addr, length, end_addr;
332 unsigned long start_pfn, nr_pages;
333 ssize_t ret;
334
335 ret = sscanf(buf, "%lx %lx", &start_addr, &length);
336 if (ret != 2)
337 return -EINVAL;
338
Manish Ahujafd35cff2008-03-22 10:47:04 +1100339 track_freed_range(start_addr, length);
340
Manish Ahujad5a29c72008-03-22 10:39:17 +1100341 /* Range-check - don't free any reserved memory that
342 * wasn't reserved for phyp-dump */
343 if (start_addr < phyp_dump_info->init_reserve_start)
344 start_addr = phyp_dump_info->init_reserve_start;
345
346 end_addr = phyp_dump_info->init_reserve_start +
347 phyp_dump_info->init_reserve_size;
348 if (start_addr+length > end_addr)
349 length = end_addr - start_addr;
350
351 /* Release the region of memory assed in by user */
352 start_pfn = PFN_DOWN(start_addr);
353 nr_pages = PFN_DOWN(length);
354 release_memory_range(start_pfn, nr_pages);
355
356 return count;
357}
358
Manish Ahujaa9c508d2008-03-22 10:45:22 +1100359static ssize_t show_release_region(struct kobject *kobj,
360 struct kobj_attribute *attr, char *buf)
361{
362 u64 second_addr_range;
363
364 /* total reserved size - start of scratch area */
365 second_addr_range = phyp_dump_info->init_reserve_size -
366 phyp_dump_info->reserved_scratch_size;
367 return sprintf(buf, "CPU:0x%lx-0x%lx: HPTE:0x%lx-0x%lx:"
368 " DUMP:0x%lx-0x%lx, 0x%lx-0x%lx:\n",
369 phdr.cpu_data.destination_address,
370 phdr.cpu_data.length_copied,
371 phdr.hpte_data.destination_address,
372 phdr.hpte_data.length_copied,
373 phdr.kernel_data.destination_address,
374 phdr.kernel_data.length_copied,
375 phyp_dump_info->init_reserve_start,
376 second_addr_range);
377}
378
Manish Ahujad5a29c72008-03-22 10:39:17 +1100379static struct kobj_attribute rr = __ATTR(release_region, 0600,
Manish Ahujaa9c508d2008-03-22 10:45:22 +1100380 show_release_region,
381 store_release_region);
Manish Ahujad5a29c72008-03-22 10:39:17 +1100382
Manish Ahuja6ac26c82008-03-22 10:37:08 +1100383static int __init phyp_dump_setup(void)
384{
Manish Ahujad5a29c72008-03-22 10:39:17 +1100385 struct device_node *rtas;
Manish Ahuja2c4f4112008-03-22 10:43:15 +1100386 const struct phyp_dump_header *dump_header = NULL;
387 unsigned long dump_area_start;
388 unsigned long dump_area_length;
Manish Ahujad5a29c72008-03-22 10:39:17 +1100389 int header_len = 0;
390 int rc;
Manish Ahuja6ac26c82008-03-22 10:37:08 +1100391
392 /* If no memory was reserved in early boot, there is nothing to do */
393 if (phyp_dump_info->init_reserve_size == 0)
394 return 0;
395
Manish Ahujad5a29c72008-03-22 10:39:17 +1100396 /* Return if phyp dump not supported */
397 if (!phyp_dump_info->phyp_dump_configured)
398 return -ENOSYS;
399
Manish Ahuja2c4f4112008-03-22 10:43:15 +1100400 /* Is there dump data waiting for us? If there isn't,
401 * then register a new dump area, and release all of
402 * the rest of the reserved ram.
403 *
404 * The /rtas/ibm,kernel-dump rtas node is present only
405 * if there is dump data waiting for us.
406 */
Manish Ahujad5a29c72008-03-22 10:39:17 +1100407 rtas = of_find_node_by_path("/rtas");
408 if (rtas) {
409 dump_header = of_get_property(rtas, "ibm,kernel-dump",
410 &header_len);
411 of_node_put(rtas);
412 }
413
Manish Ahuja599c1aa2008-03-22 10:44:05 +1100414 print_dump_header(dump_header);
Manish Ahuja2c4f4112008-03-22 10:43:15 +1100415 dump_area_length = init_dump_header(&phdr);
Manish Ahuja2c4f4112008-03-22 10:43:15 +1100416 /* align down */
417 dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;
418
419 if (dump_header == NULL) {
420 register_dump_area(&phdr, dump_area_start);
Manish Ahujad5a29c72008-03-22 10:39:17 +1100421 return 0;
422 }
Manish Ahuja6ac26c82008-03-22 10:37:08 +1100423
Manish Ahujaa9c508d2008-03-22 10:45:22 +1100424 /* re-register the dump area, if old dump was invalid */
425 if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {
426 invalidate_last_dump(&phdr, dump_area_start);
427 register_dump_area(&phdr, dump_area_start);
428 return 0;
429 }
430
431 if (dump_header) {
432 phyp_dump_info->reserved_scratch_addr =
433 dump_header->cpu_data.destination_address;
434 phyp_dump_info->reserved_scratch_size =
435 dump_header->cpu_data.source_length +
436 dump_header->hpte_data.source_length +
437 dump_header->kernel_data.source_length;
438 }
439
Manish Ahuja2c4f4112008-03-22 10:43:15 +1100440 /* Should we create a dump_subsys, analogous to s390/ipl.c ? */
441 rc = sysfs_create_file(kernel_kobj, &rr.attr);
442 if (rc)
443 printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",
444 rc);
445
446 /* ToDo: re-register the dump area, for next time. */
Manish Ahuja6ac26c82008-03-22 10:37:08 +1100447 return 0;
448}
449machine_subsys_initcall(pseries, phyp_dump_setup);
450
451int __init early_init_dt_scan_phyp_dump(unsigned long node,
452 const char *uname, int depth, void *data)
453{
454 const unsigned int *sizes;
455
456 phyp_dump_info->phyp_dump_configured = 0;
457 phyp_dump_info->phyp_dump_is_active = 0;
458
459 if (depth != 1 || strcmp(uname, "rtas") != 0)
460 return 0;
461
462 if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))
463 phyp_dump_info->phyp_dump_configured++;
464
465 if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))
466 phyp_dump_info->phyp_dump_is_active++;
467
468 sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
469 NULL);
470 if (!sizes)
471 return 0;
472
473 if (sizes[0] == 1)
474 phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);
475
476 if (sizes[3] == 2)
477 phyp_dump_info->hpte_region_size =
478 *((unsigned long *)&sizes[4]);
479 return 1;
480}
Manish Ahuja654f5962008-03-22 11:38:59 +1100481
482/* Look for phyp_dump= cmdline option */
483static int __init early_phyp_dump_enabled(char *p)
484{
485 phyp_dump_info->phyp_dump_at_boot = 1;
486
487 if (!p)
488 return 0;
489
490 if (strncmp(p, "1", 1) == 0)
491 phyp_dump_info->phyp_dump_at_boot = 1;
492 else if (strncmp(p, "0", 1) == 0)
493 phyp_dump_info->phyp_dump_at_boot = 0;
494
495 return 0;
496}
497early_param("phyp_dump", early_phyp_dump_enabled);
498
Manish Ahuja37ddd5d2008-04-12 09:31:52 +1000499/* Look for phyp_dump_reserve_size= cmdline option */
500static int __init early_phyp_dump_reserve_size(char *p)
501{
502 if (p)
503 phyp_dump_info->reserve_bootvar = memparse(p, &p);
504
505 return 0;
506}
507early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);