blob: 8a04216e8b4d37856bec43d273f24b45cadc138e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/fs.h>
15#include <linux/stat.h>
16#include <linux/time.h>
17#include <linux/mm.h>
18#include <linux/mman.h>
19#include <linux/a.out.h>
20#include <linux/errno.h>
21#include <linux/signal.h>
22#include <linux/binfmts.h>
23#include <linux/string.h>
24#include <linux/file.h>
25#include <linux/fcntl.h>
26#include <linux/ptrace.h>
27#include <linux/slab.h>
28#include <linux/shm.h>
29#include <linux/personality.h>
30#include <linux/elfcore.h>
31#include <linux/init.h>
32#include <linux/highuid.h>
33#include <linux/smp.h>
34#include <linux/smp_lock.h>
35#include <linux/compiler.h>
36#include <linux/highmem.h>
37#include <linux/pagemap.h>
38#include <linux/security.h>
39#include <linux/syscalls.h>
40#include <linux/random.h>
41
42#include <asm/uaccess.h>
43#include <asm/param.h>
44#include <asm/page.h>
45
46#include <linux/elf.h>
47
48static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs);
49static int load_elf_library(struct file*);
50static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
51extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
52
53#ifndef elf_addr_t
54#define elf_addr_t unsigned long
55#endif
56
57/*
58 * If we don't support core dumping, then supply a NULL so we
59 * don't even try.
60 */
Matt Mackall708e9a72006-01-08 01:05:25 -080061#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Linus Torvalds1da177e2005-04-16 15:20:36 -070062static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
63#else
64#define elf_core_dump NULL
65#endif
66
67#if ELF_EXEC_PAGESIZE > PAGE_SIZE
68# define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
69#else
70# define ELF_MIN_ALIGN PAGE_SIZE
71#endif
72
73#ifndef ELF_CORE_EFLAGS
74#define ELF_CORE_EFLAGS 0
75#endif
76
77#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
78#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
79#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
80
81static struct linux_binfmt elf_format = {
82 .module = THIS_MODULE,
83 .load_binary = load_elf_binary,
84 .load_shlib = load_elf_library,
85 .core_dump = elf_core_dump,
86 .min_coredump = ELF_EXEC_PAGESIZE
87};
88
89#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE)
90
91static int set_brk(unsigned long start, unsigned long end)
92{
93 start = ELF_PAGEALIGN(start);
94 end = ELF_PAGEALIGN(end);
95 if (end > start) {
96 unsigned long addr;
97 down_write(&current->mm->mmap_sem);
98 addr = do_brk(start, end - start);
99 up_write(&current->mm->mmap_sem);
100 if (BAD_ADDR(addr))
101 return addr;
102 }
103 current->mm->start_brk = current->mm->brk = end;
104 return 0;
105}
106
107
108/* We need to explicitly zero any fractional pages
109 after the data section (i.e. bss). This would
110 contain the junk from the file that should not
111 be in memory */
112
113
114static int padzero(unsigned long elf_bss)
115{
116 unsigned long nbyte;
117
118 nbyte = ELF_PAGEOFFSET(elf_bss);
119 if (nbyte) {
120 nbyte = ELF_MIN_ALIGN - nbyte;
121 if (clear_user((void __user *) elf_bss, nbyte))
122 return -EFAULT;
123 }
124 return 0;
125}
126
127/* Let's use some macros to make this stack manipulation a litle clearer */
128#ifdef CONFIG_STACK_GROWSUP
129#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
130#define STACK_ROUND(sp, items) \
131 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
132#define STACK_ALLOC(sp, len) ({ elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; old_sp; })
133#else
134#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
135#define STACK_ROUND(sp, items) \
136 (((unsigned long) (sp - items)) &~ 15UL)
137#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
138#endif
139
140static int
141create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
142 int interp_aout, unsigned long load_addr,
143 unsigned long interp_load_addr)
144{
145 unsigned long p = bprm->p;
146 int argc = bprm->argc;
147 int envc = bprm->envc;
148 elf_addr_t __user *argv;
149 elf_addr_t __user *envp;
150 elf_addr_t __user *sp;
151 elf_addr_t __user *u_platform;
152 const char *k_platform = ELF_PLATFORM;
153 int items;
154 elf_addr_t *elf_info;
155 int ei_index = 0;
156 struct task_struct *tsk = current;
157
158 /*
159 * If this architecture has a platform capability string, copy it
160 * to userspace. In some cases (Sparc), this info is impossible
161 * for userspace to get any other way, in others (i386) it is
162 * merely difficult.
163 */
164
165 u_platform = NULL;
166 if (k_platform) {
167 size_t len = strlen(k_platform) + 1;
168
169 /*
170 * In some cases (e.g. Hyper-Threading), we want to avoid L1
171 * evictions by the processes running on the same package. One
172 * thing we can do is to shuffle the initial stack for them.
173 */
174
175 p = arch_align_stack(p);
176
177 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
178 if (__copy_to_user(u_platform, k_platform, len))
179 return -EFAULT;
180 }
181
182 /* Create the ELF interpreter info */
183 elf_info = (elf_addr_t *) current->mm->saved_auxv;
184#define NEW_AUX_ENT(id, val) \
185 do { elf_info[ei_index++] = id; elf_info[ei_index++] = val; } while (0)
186
187#ifdef ARCH_DLINFO
188 /*
189 * ARCH_DLINFO must come first so PPC can do its special alignment of
190 * AUXV.
191 */
192 ARCH_DLINFO;
193#endif
194 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
195 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
196 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
197 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
198 NEW_AUX_ENT(AT_PHENT, sizeof (struct elf_phdr));
199 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
200 NEW_AUX_ENT(AT_BASE, interp_load_addr);
201 NEW_AUX_ENT(AT_FLAGS, 0);
202 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
203 NEW_AUX_ENT(AT_UID, (elf_addr_t) tsk->uid);
204 NEW_AUX_ENT(AT_EUID, (elf_addr_t) tsk->euid);
205 NEW_AUX_ENT(AT_GID, (elf_addr_t) tsk->gid);
206 NEW_AUX_ENT(AT_EGID, (elf_addr_t) tsk->egid);
207 NEW_AUX_ENT(AT_SECURE, (elf_addr_t) security_bprm_secureexec(bprm));
208 if (k_platform) {
209 NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform);
210 }
211 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
212 NEW_AUX_ENT(AT_EXECFD, (elf_addr_t) bprm->interp_data);
213 }
214#undef NEW_AUX_ENT
215 /* AT_NULL is zero; clear the rest too */
216 memset(&elf_info[ei_index], 0,
217 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
218
219 /* And advance past the AT_NULL entry. */
220 ei_index += 2;
221
222 sp = STACK_ADD(p, ei_index);
223
224 items = (argc + 1) + (envc + 1);
225 if (interp_aout) {
226 items += 3; /* a.out interpreters require argv & envp too */
227 } else {
228 items += 1; /* ELF interpreters only put argc on the stack */
229 }
230 bprm->p = STACK_ROUND(sp, items);
231
232 /* Point sp at the lowest address on the stack */
233#ifdef CONFIG_STACK_GROWSUP
234 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
235 bprm->exec = (unsigned long) sp; /* XXX: PARISC HACK */
236#else
237 sp = (elf_addr_t __user *)bprm->p;
238#endif
239
240 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
241 if (__put_user(argc, sp++))
242 return -EFAULT;
243 if (interp_aout) {
244 argv = sp + 2;
245 envp = argv + argc + 1;
246 __put_user((elf_addr_t)(unsigned long)argv, sp++);
247 __put_user((elf_addr_t)(unsigned long)envp, sp++);
248 } else {
249 argv = sp;
250 envp = argv + argc + 1;
251 }
252
253 /* Populate argv and envp */
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -0700254 p = current->mm->arg_end = current->mm->arg_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 while (argc-- > 0) {
256 size_t len;
257 __put_user((elf_addr_t)p, argv++);
258 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
259 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
260 return 0;
261 p += len;
262 }
263 if (__put_user(0, argv))
264 return -EFAULT;
265 current->mm->arg_end = current->mm->env_start = p;
266 while (envc-- > 0) {
267 size_t len;
268 __put_user((elf_addr_t)p, envp++);
269 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
270 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
271 return 0;
272 p += len;
273 }
274 if (__put_user(0, envp))
275 return -EFAULT;
276 current->mm->env_end = p;
277
278 /* Put the elf_info on the stack in the right place. */
279 sp = (elf_addr_t __user *)envp + 1;
280 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281 return -EFAULT;
282 return 0;
283}
284
285#ifndef elf_map
286
287static unsigned long elf_map(struct file *filep, unsigned long addr,
288 struct elf_phdr *eppnt, int prot, int type)
289{
290 unsigned long map_addr;
David Gibsondda6ebd2006-01-08 01:03:35 -0800291 unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292
293 down_write(&current->mm->mmap_sem);
David Gibsondda6ebd2006-01-08 01:03:35 -0800294 /* mmap() will return -EINVAL if given a zero size, but a
295 * segment with zero filesize is perfectly valid */
296 if (eppnt->p_filesz + pageoffset)
297 map_addr = do_mmap(filep, ELF_PAGESTART(addr),
298 eppnt->p_filesz + pageoffset, prot, type,
299 eppnt->p_offset - pageoffset);
300 else
301 map_addr = ELF_PAGESTART(addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 up_write(&current->mm->mmap_sem);
303 return(map_addr);
304}
305
306#endif /* !elf_map */
307
308/* This is much more generalized than the library routine read function,
309 so we keep this separate. Technically the library read function
310 is only provided so that we can read a.out libraries that have
311 an ELF header */
312
313static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
314 struct file * interpreter,
315 unsigned long *interp_load_addr)
316{
317 struct elf_phdr *elf_phdata;
318 struct elf_phdr *eppnt;
319 unsigned long load_addr = 0;
320 int load_addr_set = 0;
321 unsigned long last_bss = 0, elf_bss = 0;
322 unsigned long error = ~0UL;
323 int retval, i, size;
324
325 /* First of all, some simple consistency checks */
326 if (interp_elf_ex->e_type != ET_EXEC &&
327 interp_elf_ex->e_type != ET_DYN)
328 goto out;
329 if (!elf_check_arch(interp_elf_ex))
330 goto out;
331 if (!interpreter->f_op || !interpreter->f_op->mmap)
332 goto out;
333
334 /*
335 * If the size of this structure has changed, then punt, since
336 * we will be doing the wrong thing.
337 */
338 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
339 goto out;
340 if (interp_elf_ex->e_phnum < 1 ||
341 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
342 goto out;
343
344 /* Now read in all of the header information */
345
346 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
347 if (size > ELF_MIN_ALIGN)
348 goto out;
349 elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
350 if (!elf_phdata)
351 goto out;
352
353 retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
354 error = -EIO;
355 if (retval != size) {
356 if (retval < 0)
357 error = retval;
358 goto out_close;
359 }
360
361 eppnt = elf_phdata;
362 for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
363 if (eppnt->p_type == PT_LOAD) {
364 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
365 int elf_prot = 0;
366 unsigned long vaddr = 0;
367 unsigned long k, map_addr;
368
369 if (eppnt->p_flags & PF_R) elf_prot = PROT_READ;
370 if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
371 if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
372 vaddr = eppnt->p_vaddr;
373 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
374 elf_type |= MAP_FIXED;
375
376 map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type);
377 error = map_addr;
378 if (BAD_ADDR(map_addr))
379 goto out_close;
380
381 if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) {
382 load_addr = map_addr - ELF_PAGESTART(vaddr);
383 load_addr_set = 1;
384 }
385
386 /*
387 * Check to see if the section's size will overflow the
388 * allowed task size. Note that p_filesz must always be
389 * <= p_memsize so it is only necessary to check p_memsz.
390 */
391 k = load_addr + eppnt->p_vaddr;
392 if (k > TASK_SIZE || eppnt->p_filesz > eppnt->p_memsz ||
393 eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) {
394 error = -ENOMEM;
395 goto out_close;
396 }
397
398 /*
399 * Find the end of the file mapping for this phdr, and keep
400 * track of the largest address we see for this.
401 */
402 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
403 if (k > elf_bss)
404 elf_bss = k;
405
406 /*
407 * Do the same thing for the memory mapping - between
408 * elf_bss and last_bss is the bss section.
409 */
410 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
411 if (k > last_bss)
412 last_bss = k;
413 }
414 }
415
416 /*
417 * Now fill out the bss section. First pad the last page up
418 * to the page boundary, and then perform a mmap to make sure
419 * that there are zero-mapped pages up to and including the
420 * last bss page.
421 */
422 if (padzero(elf_bss)) {
423 error = -EFAULT;
424 goto out_close;
425 }
426
427 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* What we have mapped so far */
428
429 /* Map the last of the bss segment */
430 if (last_bss > elf_bss) {
431 down_write(&current->mm->mmap_sem);
432 error = do_brk(elf_bss, last_bss - elf_bss);
433 up_write(&current->mm->mmap_sem);
434 if (BAD_ADDR(error))
435 goto out_close;
436 }
437
438 *interp_load_addr = load_addr;
439 error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;
440
441out_close:
442 kfree(elf_phdata);
443out:
444 return error;
445}
446
447static unsigned long load_aout_interp(struct exec * interp_ex,
448 struct file * interpreter)
449{
450 unsigned long text_data, elf_entry = ~0UL;
451 char __user * addr;
452 loff_t offset;
453
454 current->mm->end_code = interp_ex->a_text;
455 text_data = interp_ex->a_text + interp_ex->a_data;
456 current->mm->end_data = text_data;
457 current->mm->brk = interp_ex->a_bss + text_data;
458
459 switch (N_MAGIC(*interp_ex)) {
460 case OMAGIC:
461 offset = 32;
462 addr = (char __user *)0;
463 break;
464 case ZMAGIC:
465 case QMAGIC:
466 offset = N_TXTOFF(*interp_ex);
467 addr = (char __user *) N_TXTADDR(*interp_ex);
468 break;
469 default:
470 goto out;
471 }
472
473 down_write(&current->mm->mmap_sem);
474 do_brk(0, text_data);
475 up_write(&current->mm->mmap_sem);
476 if (!interpreter->f_op || !interpreter->f_op->read)
477 goto out;
478 if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
479 goto out;
480 flush_icache_range((unsigned long)addr,
481 (unsigned long)addr + text_data);
482
483
484 down_write(&current->mm->mmap_sem);
485 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
486 interp_ex->a_bss);
487 up_write(&current->mm->mmap_sem);
488 elf_entry = interp_ex->a_entry;
489
490out:
491 return elf_entry;
492}
493
494/*
495 * These are the functions used to load ELF style executables and shared
496 * libraries. There is no binary dependent code anywhere else.
497 */
498
499#define INTERPRETER_NONE 0
500#define INTERPRETER_AOUT 1
501#define INTERPRETER_ELF 2
502
Andi Kleen913bd902006-03-25 16:29:09 +0100503#ifndef STACK_RND_MASK
504#define STACK_RND_MASK 0x7ff /* with 4K pages 8MB of VA */
505#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506
507static unsigned long randomize_stack_top(unsigned long stack_top)
508{
509 unsigned int random_variable = 0;
510
Andi Kleen913bd902006-03-25 16:29:09 +0100511 if (current->flags & PF_RANDOMIZE) {
512 random_variable = get_random_int() & STACK_RND_MASK;
513 random_variable <<= PAGE_SHIFT;
514 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515#ifdef CONFIG_STACK_GROWSUP
Andi Kleen913bd902006-03-25 16:29:09 +0100516 return PAGE_ALIGN(stack_top) + random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517#else
Andi Kleen913bd902006-03-25 16:29:09 +0100518 return PAGE_ALIGN(stack_top) - random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519#endif
520}
521
522static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
523{
524 struct file *interpreter = NULL; /* to shut gcc up */
525 unsigned long load_addr = 0, load_bias = 0;
526 int load_addr_set = 0;
527 char * elf_interpreter = NULL;
528 unsigned int interpreter_type = INTERPRETER_NONE;
529 unsigned char ibcs2_interpreter = 0;
530 unsigned long error;
531 struct elf_phdr * elf_ppnt, *elf_phdata;
532 unsigned long elf_bss, elf_brk;
533 int elf_exec_fileno;
534 int retval, i;
535 unsigned int size;
536 unsigned long elf_entry, interp_load_addr = 0;
537 unsigned long start_code, end_code, start_data, end_data;
538 unsigned long reloc_func_desc = 0;
539 char passed_fileno[6];
540 struct files_struct *files;
541 int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
542 unsigned long def_flags = 0;
543 struct {
544 struct elfhdr elf_ex;
545 struct elfhdr interp_elf_ex;
546 struct exec interp_ex;
547 } *loc;
548
549 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
550 if (!loc) {
551 retval = -ENOMEM;
552 goto out_ret;
553 }
554
555 /* Get the exec-header */
556 loc->elf_ex = *((struct elfhdr *) bprm->buf);
557
558 retval = -ENOEXEC;
559 /* First of all, some simple consistency checks */
560 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
561 goto out;
562
563 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
564 goto out;
565 if (!elf_check_arch(&loc->elf_ex))
566 goto out;
567 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
568 goto out;
569
570 /* Now read in all of the header information */
571
572 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
573 goto out;
574 if (loc->elf_ex.e_phnum < 1 ||
575 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
576 goto out;
577 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
578 retval = -ENOMEM;
579 elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
580 if (!elf_phdata)
581 goto out;
582
583 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *) elf_phdata, size);
584 if (retval != size) {
585 if (retval >= 0)
586 retval = -EIO;
587 goto out_free_ph;
588 }
589
590 files = current->files; /* Refcounted so ok */
591 retval = unshare_files();
592 if (retval < 0)
593 goto out_free_ph;
594 if (files == current->files) {
595 put_files_struct(files);
596 files = NULL;
597 }
598
599 /* exec will make our files private anyway, but for the a.out
600 loader stuff we need to do it earlier */
601
602 retval = get_unused_fd();
603 if (retval < 0)
604 goto out_free_fh;
605 get_file(bprm->file);
606 fd_install(elf_exec_fileno = retval, bprm->file);
607
608 elf_ppnt = elf_phdata;
609 elf_bss = 0;
610 elf_brk = 0;
611
612 start_code = ~0UL;
613 end_code = 0;
614 start_data = 0;
615 end_data = 0;
616
617 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
618 if (elf_ppnt->p_type == PT_INTERP) {
619 /* This is the program interpreter used for
620 * shared libraries - for now assume that this
621 * is an a.out format binary
622 */
623
624 retval = -ENOEXEC;
625 if (elf_ppnt->p_filesz > PATH_MAX ||
626 elf_ppnt->p_filesz < 2)
627 goto out_free_file;
628
629 retval = -ENOMEM;
Jesper Juhl792db3a2006-01-09 20:54:45 -0800630 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 GFP_KERNEL);
632 if (!elf_interpreter)
633 goto out_free_file;
634
635 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
636 elf_interpreter,
637 elf_ppnt->p_filesz);
638 if (retval != elf_ppnt->p_filesz) {
639 if (retval >= 0)
640 retval = -EIO;
641 goto out_free_interp;
642 }
643 /* make sure path is NULL terminated */
644 retval = -ENOEXEC;
645 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
646 goto out_free_interp;
647
648 /* If the program interpreter is one of these two,
649 * then assume an iBCS2 image. Otherwise assume
650 * a native linux image.
651 */
652 if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
653 strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
654 ibcs2_interpreter = 1;
655
656 /*
657 * The early SET_PERSONALITY here is so that the lookup
658 * for the interpreter happens in the namespace of the
659 * to-be-execed image. SET_PERSONALITY can select an
660 * alternate root.
661 *
662 * However, SET_PERSONALITY is NOT allowed to switch
663 * this task into the new images's memory mapping
664 * policy - that is, TASK_SIZE must still evaluate to
665 * that which is appropriate to the execing application.
666 * This is because exit_mmap() needs to have TASK_SIZE
667 * evaluate to the size of the old image.
668 *
669 * So if (say) a 64-bit application is execing a 32-bit
670 * application it is the architecture's responsibility
671 * to defer changing the value of TASK_SIZE until the
672 * switch really is going to happen - do this in
673 * flush_thread(). - akpm
674 */
675 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
676
677 interpreter = open_exec(elf_interpreter);
678 retval = PTR_ERR(interpreter);
679 if (IS_ERR(interpreter))
680 goto out_free_interp;
681 retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
682 if (retval != BINPRM_BUF_SIZE) {
683 if (retval >= 0)
684 retval = -EIO;
685 goto out_free_dentry;
686 }
687
688 /* Get the exec headers */
689 loc->interp_ex = *((struct exec *) bprm->buf);
690 loc->interp_elf_ex = *((struct elfhdr *) bprm->buf);
691 break;
692 }
693 elf_ppnt++;
694 }
695
696 elf_ppnt = elf_phdata;
697 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
698 if (elf_ppnt->p_type == PT_GNU_STACK) {
699 if (elf_ppnt->p_flags & PF_X)
700 executable_stack = EXSTACK_ENABLE_X;
701 else
702 executable_stack = EXSTACK_DISABLE_X;
703 break;
704 }
705 have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
706
707 /* Some simple consistency checks for the interpreter */
708 if (elf_interpreter) {
709 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
710
711 /* Now figure out which format our binary is */
712 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
713 (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
714 (N_MAGIC(loc->interp_ex) != QMAGIC))
715 interpreter_type = INTERPRETER_ELF;
716
717 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
718 interpreter_type &= ~INTERPRETER_ELF;
719
720 retval = -ELIBBAD;
721 if (!interpreter_type)
722 goto out_free_dentry;
723
724 /* Make sure only one type was selected */
725 if ((interpreter_type & INTERPRETER_ELF) &&
726 interpreter_type != INTERPRETER_ELF) {
727 // FIXME - ratelimit this before re-enabling
728 // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
729 interpreter_type = INTERPRETER_ELF;
730 }
731 /* Verify the interpreter has a valid arch */
732 if ((interpreter_type == INTERPRETER_ELF) &&
733 !elf_check_arch(&loc->interp_elf_ex))
734 goto out_free_dentry;
735 } else {
736 /* Executables without an interpreter also need a personality */
737 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
738 }
739
740 /* OK, we are done with that, now set up the arg stuff,
741 and then start this sucker up */
742
743 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
744 char *passed_p = passed_fileno;
745 sprintf(passed_fileno, "%d", elf_exec_fileno);
746
747 if (elf_interpreter) {
748 retval = copy_strings_kernel(1, &passed_p, bprm);
749 if (retval)
750 goto out_free_dentry;
751 bprm->argc++;
752 }
753 }
754
755 /* Flush all traces of the currently running executable */
756 retval = flush_old_exec(bprm);
757 if (retval)
758 goto out_free_dentry;
759
760 /* Discard our unneeded old files struct */
761 if (files) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 put_files_struct(files);
763 files = NULL;
764 }
765
766 /* OK, This is the point of no return */
767 current->mm->start_data = 0;
768 current->mm->end_data = 0;
769 current->mm->end_code = 0;
770 current->mm->mmap = NULL;
771 current->flags &= ~PF_FORKNOEXEC;
772 current->mm->def_flags = def_flags;
773
774 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
775 may depend on the personality. */
776 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
777 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
778 current->personality |= READ_IMPLIES_EXEC;
779
780 if ( !(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
781 current->flags |= PF_RANDOMIZE;
782 arch_pick_mmap_layout(current->mm);
783
784 /* Do this so that we can load the interpreter, if need be. We will
785 change some of these later */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 current->mm->free_area_cache = current->mm->mmap_base;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700787 current->mm->cached_hole_size = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
789 executable_stack);
790 if (retval < 0) {
791 send_sig(SIGKILL, current, 0);
792 goto out_free_dentry;
793 }
794
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795 current->mm->start_stack = bprm->p;
796
797 /* Now we do a little grungy work by mmaping the ELF image into
798 the correct location in memory. At this point, we assume that
799 the image should be loaded at fixed address, not at a variable
800 address. */
801
802 for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
803 int elf_prot = 0, elf_flags;
804 unsigned long k, vaddr;
805
806 if (elf_ppnt->p_type != PT_LOAD)
807 continue;
808
809 if (unlikely (elf_brk > elf_bss)) {
810 unsigned long nbyte;
811
812 /* There was a PT_LOAD segment with p_memsz > p_filesz
813 before this one. Map anonymous pages, if needed,
814 and clear the area. */
815 retval = set_brk (elf_bss + load_bias,
816 elf_brk + load_bias);
817 if (retval) {
818 send_sig(SIGKILL, current, 0);
819 goto out_free_dentry;
820 }
821 nbyte = ELF_PAGEOFFSET(elf_bss);
822 if (nbyte) {
823 nbyte = ELF_MIN_ALIGN - nbyte;
824 if (nbyte > elf_brk - elf_bss)
825 nbyte = elf_brk - elf_bss;
826 if (clear_user((void __user *)elf_bss +
827 load_bias, nbyte)) {
828 /*
829 * This bss-zeroing can fail if the ELF
830 * file specifies odd protections. So
831 * we don't check the return value
832 */
833 }
834 }
835 }
836
837 if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ;
838 if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
839 if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
840
841 elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
842
843 vaddr = elf_ppnt->p_vaddr;
844 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
845 elf_flags |= MAP_FIXED;
846 } else if (loc->elf_ex.e_type == ET_DYN) {
847 /* Try and get dynamic programs out of the way of the default mmap
848 base, as well as whatever program they might try to exec. This
849 is because the brk will follow the loader, and is not movable. */
850 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
851 }
852
853 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
854 if (BAD_ADDR(error)) {
855 send_sig(SIGKILL, current, 0);
856 goto out_free_dentry;
857 }
858
859 if (!load_addr_set) {
860 load_addr_set = 1;
861 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
862 if (loc->elf_ex.e_type == ET_DYN) {
863 load_bias += error -
864 ELF_PAGESTART(load_bias + vaddr);
865 load_addr += load_bias;
866 reloc_func_desc = load_bias;
867 }
868 }
869 k = elf_ppnt->p_vaddr;
870 if (k < start_code) start_code = k;
871 if (start_data < k) start_data = k;
872
873 /*
874 * Check to see if the section's size will overflow the
875 * allowed task size. Note that p_filesz must always be
876 * <= p_memsz so it is only necessary to check p_memsz.
877 */
878 if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
879 elf_ppnt->p_memsz > TASK_SIZE ||
880 TASK_SIZE - elf_ppnt->p_memsz < k) {
881 /* set_brk can never work. Avoid overflows. */
882 send_sig(SIGKILL, current, 0);
883 goto out_free_dentry;
884 }
885
886 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
887
888 if (k > elf_bss)
889 elf_bss = k;
890 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
891 end_code = k;
892 if (end_data < k)
893 end_data = k;
894 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
895 if (k > elf_brk)
896 elf_brk = k;
897 }
898
899 loc->elf_ex.e_entry += load_bias;
900 elf_bss += load_bias;
901 elf_brk += load_bias;
902 start_code += load_bias;
903 end_code += load_bias;
904 start_data += load_bias;
905 end_data += load_bias;
906
907 /* Calling set_brk effectively mmaps the pages that we need
908 * for the bss and break sections. We must do this before
909 * mapping in the interpreter, to make sure it doesn't wind
910 * up getting placed where the bss needs to go.
911 */
912 retval = set_brk(elf_bss, elf_brk);
913 if (retval) {
914 send_sig(SIGKILL, current, 0);
915 goto out_free_dentry;
916 }
akpm@osdl.org6de50512005-10-11 08:29:08 -0700917 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 send_sig(SIGSEGV, current, 0);
919 retval = -EFAULT; /* Nobody gets to see this, but.. */
920 goto out_free_dentry;
921 }
922
923 if (elf_interpreter) {
924 if (interpreter_type == INTERPRETER_AOUT)
925 elf_entry = load_aout_interp(&loc->interp_ex,
926 interpreter);
927 else
928 elf_entry = load_elf_interp(&loc->interp_elf_ex,
929 interpreter,
930 &interp_load_addr);
931 if (BAD_ADDR(elf_entry)) {
932 printk(KERN_ERR "Unable to load interpreter %.128s\n",
933 elf_interpreter);
934 force_sig(SIGSEGV, current);
935 retval = -ENOEXEC; /* Nobody gets to see this, but.. */
936 goto out_free_dentry;
937 }
938 reloc_func_desc = interp_load_addr;
939
940 allow_write_access(interpreter);
941 fput(interpreter);
942 kfree(elf_interpreter);
943 } else {
944 elf_entry = loc->elf_ex.e_entry;
Suresh Siddha5342fba2006-02-26 04:18:28 +0100945 if (BAD_ADDR(elf_entry)) {
946 send_sig(SIGSEGV, current, 0);
947 retval = -ENOEXEC; /* Nobody gets to see this, but.. */
948 goto out_free_dentry;
949 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 }
951
952 kfree(elf_phdata);
953
954 if (interpreter_type != INTERPRETER_AOUT)
955 sys_close(elf_exec_fileno);
956
957 set_binfmt(&elf_format);
958
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -0700959#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
960 retval = arch_setup_additional_pages(bprm, executable_stack);
961 if (retval < 0) {
962 send_sig(SIGKILL, current, 0);
Roland McGrath18c8baf2005-04-28 15:17:19 -0700963 goto out;
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -0700964 }
965#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
966
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 compute_creds(bprm);
968 current->flags &= ~PF_FORKNOEXEC;
969 create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT),
970 load_addr, interp_load_addr);
971 /* N.B. passed_fileno might not be initialized? */
972 if (interpreter_type == INTERPRETER_AOUT)
973 current->mm->arg_start += strlen(passed_fileno) + 1;
974 current->mm->end_code = end_code;
975 current->mm->start_code = start_code;
976 current->mm->start_data = start_data;
977 current->mm->end_data = end_data;
978 current->mm->start_stack = bprm->p;
979
980 if (current->personality & MMAP_PAGE_ZERO) {
981 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
982 and some applications "depend" upon this behavior.
983 Since we do not have the power to recompile these, we
984 emulate the SVr4 behavior. Sigh. */
985 down_write(&current->mm->mmap_sem);
986 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
987 MAP_FIXED | MAP_PRIVATE, 0);
988 up_write(&current->mm->mmap_sem);
989 }
990
991#ifdef ELF_PLAT_INIT
992 /*
993 * The ABI may specify that certain registers be set up in special
994 * ways (on i386 %edx is the address of a DT_FINI function, for
995 * example. In addition, it may also specify (eg, PowerPC64 ELF)
996 * that the e_entry field is the address of the function descriptor
997 * for the startup routine, rather than the address of the startup
998 * routine itself. This macro performs whatever initialization to
999 * the regs structure is required as well as any relocations to the
1000 * function descriptor entries when executing dynamically links apps.
1001 */
1002 ELF_PLAT_INIT(regs, reloc_func_desc);
1003#endif
1004
1005 start_thread(regs, elf_entry, bprm->p);
1006 if (unlikely(current->ptrace & PT_PTRACED)) {
1007 if (current->ptrace & PT_TRACE_EXEC)
1008 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1009 else
1010 send_sig(SIGTRAP, current, 0);
1011 }
1012 retval = 0;
1013out:
1014 kfree(loc);
1015out_ret:
1016 return retval;
1017
1018 /* error cleanup */
1019out_free_dentry:
1020 allow_write_access(interpreter);
1021 if (interpreter)
1022 fput(interpreter);
1023out_free_interp:
Jesper Juhlf99d49a2005-11-07 01:01:34 -08001024 kfree(elf_interpreter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025out_free_file:
1026 sys_close(elf_exec_fileno);
1027out_free_fh:
1028 if (files) {
1029 put_files_struct(current->files);
1030 current->files = files;
1031 }
1032out_free_ph:
1033 kfree(elf_phdata);
1034 goto out;
1035}
1036
1037/* This is really simpleminded and specialized - we are loading an
1038 a.out library that is given an ELF header. */
1039
1040static int load_elf_library(struct file *file)
1041{
1042 struct elf_phdr *elf_phdata;
1043 struct elf_phdr *eppnt;
1044 unsigned long elf_bss, bss, len;
1045 int retval, error, i, j;
1046 struct elfhdr elf_ex;
1047
1048 error = -ENOEXEC;
1049 retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex));
1050 if (retval != sizeof(elf_ex))
1051 goto out;
1052
1053 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1054 goto out;
1055
1056 /* First of all, some simple consistency checks */
1057 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1058 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1059 goto out;
1060
1061 /* Now read in all of the header information */
1062
1063 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1064 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1065
1066 error = -ENOMEM;
1067 elf_phdata = kmalloc(j, GFP_KERNEL);
1068 if (!elf_phdata)
1069 goto out;
1070
1071 eppnt = elf_phdata;
1072 error = -ENOEXEC;
1073 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1074 if (retval != j)
1075 goto out_free_ph;
1076
1077 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1078 if ((eppnt + i)->p_type == PT_LOAD)
1079 j++;
1080 if (j != 1)
1081 goto out_free_ph;
1082
1083 while (eppnt->p_type != PT_LOAD)
1084 eppnt++;
1085
1086 /* Now use mmap to map the library into memory. */
1087 down_write(&current->mm->mmap_sem);
1088 error = do_mmap(file,
1089 ELF_PAGESTART(eppnt->p_vaddr),
1090 (eppnt->p_filesz +
1091 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1092 PROT_READ | PROT_WRITE | PROT_EXEC,
1093 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1094 (eppnt->p_offset -
1095 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1096 up_write(&current->mm->mmap_sem);
1097 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1098 goto out_free_ph;
1099
1100 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1101 if (padzero(elf_bss)) {
1102 error = -EFAULT;
1103 goto out_free_ph;
1104 }
1105
1106 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1);
1107 bss = eppnt->p_memsz + eppnt->p_vaddr;
1108 if (bss > len) {
1109 down_write(&current->mm->mmap_sem);
1110 do_brk(len, bss - len);
1111 up_write(&current->mm->mmap_sem);
1112 }
1113 error = 0;
1114
1115out_free_ph:
1116 kfree(elf_phdata);
1117out:
1118 return error;
1119}
1120
1121/*
1122 * Note that some platforms still use traditional core dumps and not
1123 * the ELF core dump. Each platform can select it as appropriate.
1124 */
Matt Mackall708e9a72006-01-08 01:05:25 -08001125#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126
1127/*
1128 * ELF core dumper
1129 *
1130 * Modelled on fs/exec.c:aout_core_dump()
1131 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1132 */
1133/*
1134 * These are the only things you should do on a core-file: use only these
1135 * functions to write out all the necessary info.
1136 */
1137static int dump_write(struct file *file, const void *addr, int nr)
1138{
1139 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1140}
1141
Daniel Jacobowitz5db92852005-06-15 22:26:34 -07001142static int dump_seek(struct file *file, loff_t off)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143{
1144 if (file->f_op->llseek) {
1145 if (file->f_op->llseek(file, off, 0) != off)
1146 return 0;
1147 } else
1148 file->f_pos = off;
1149 return 1;
1150}
1151
1152/*
1153 * Decide whether a segment is worth dumping; default is yes to be
1154 * sure (missing info is worse than too much; etc).
1155 * Personally I'd include everything, and use the coredump limit...
1156 *
1157 * I think we should skip something. But I am not sure how. H.J.
1158 */
1159static int maydump(struct vm_area_struct *vma)
1160{
1161 /* Do not dump I/O mapped devices or special mappings */
1162 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1163 return 0;
1164
1165 /* Dump shared memory only if mapped from an anonymous file. */
1166 if (vma->vm_flags & VM_SHARED)
1167 return vma->vm_file->f_dentry->d_inode->i_nlink == 0;
1168
1169 /* If it hasn't been written to, don't write it out */
1170 if (!vma->anon_vma)
1171 return 0;
1172
1173 return 1;
1174}
1175
1176#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
1177
1178/* An ELF note in memory */
1179struct memelfnote
1180{
1181 const char *name;
1182 int type;
1183 unsigned int datasz;
1184 void *data;
1185};
1186
1187static int notesize(struct memelfnote *en)
1188{
1189 int sz;
1190
1191 sz = sizeof(struct elf_note);
1192 sz += roundup(strlen(en->name) + 1, 4);
1193 sz += roundup(en->datasz, 4);
1194
1195 return sz;
1196}
1197
1198#define DUMP_WRITE(addr, nr) \
1199 do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
1200#define DUMP_SEEK(off) \
1201 do { if (!dump_seek(file, (off))) return 0; } while(0)
1202
1203static int writenote(struct memelfnote *men, struct file *file)
1204{
1205 struct elf_note en;
1206
1207 en.n_namesz = strlen(men->name) + 1;
1208 en.n_descsz = men->datasz;
1209 en.n_type = men->type;
1210
1211 DUMP_WRITE(&en, sizeof(en));
1212 DUMP_WRITE(men->name, en.n_namesz);
1213 /* XXX - cast from long long to long to avoid need for libgcc.a */
1214 DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */
1215 DUMP_WRITE(men->data, men->datasz);
1216 DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */
1217
1218 return 1;
1219}
1220#undef DUMP_WRITE
1221#undef DUMP_SEEK
1222
1223#define DUMP_WRITE(addr, nr) \
1224 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1225 goto end_coredump;
1226#define DUMP_SEEK(off) \
1227 if (!dump_seek(file, (off))) \
1228 goto end_coredump;
1229
Arjan van de Ven858119e2006-01-14 13:20:43 -08001230static void fill_elf_header(struct elfhdr *elf, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231{
1232 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1233 elf->e_ident[EI_CLASS] = ELF_CLASS;
1234 elf->e_ident[EI_DATA] = ELF_DATA;
1235 elf->e_ident[EI_VERSION] = EV_CURRENT;
1236 elf->e_ident[EI_OSABI] = ELF_OSABI;
1237 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1238
1239 elf->e_type = ET_CORE;
1240 elf->e_machine = ELF_ARCH;
1241 elf->e_version = EV_CURRENT;
1242 elf->e_entry = 0;
1243 elf->e_phoff = sizeof(struct elfhdr);
1244 elf->e_shoff = 0;
1245 elf->e_flags = ELF_CORE_EFLAGS;
1246 elf->e_ehsize = sizeof(struct elfhdr);
1247 elf->e_phentsize = sizeof(struct elf_phdr);
1248 elf->e_phnum = segs;
1249 elf->e_shentsize = 0;
1250 elf->e_shnum = 0;
1251 elf->e_shstrndx = 0;
1252 return;
1253}
1254
Arjan van de Ven858119e2006-01-14 13:20:43 -08001255static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256{
1257 phdr->p_type = PT_NOTE;
1258 phdr->p_offset = offset;
1259 phdr->p_vaddr = 0;
1260 phdr->p_paddr = 0;
1261 phdr->p_filesz = sz;
1262 phdr->p_memsz = 0;
1263 phdr->p_flags = 0;
1264 phdr->p_align = 0;
1265 return;
1266}
1267
1268static void fill_note(struct memelfnote *note, const char *name, int type,
1269 unsigned int sz, void *data)
1270{
1271 note->name = name;
1272 note->type = type;
1273 note->datasz = sz;
1274 note->data = data;
1275 return;
1276}
1277
1278/*
1279 * fill up all the fields in prstatus from the given task struct, except registers
1280 * which need to be filled up separately.
1281 */
1282static void fill_prstatus(struct elf_prstatus *prstatus,
1283 struct task_struct *p, long signr)
1284{
1285 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1286 prstatus->pr_sigpend = p->pending.signal.sig[0];
1287 prstatus->pr_sighold = p->blocked.sig[0];
1288 prstatus->pr_pid = p->pid;
1289 prstatus->pr_ppid = p->parent->pid;
1290 prstatus->pr_pgrp = process_group(p);
1291 prstatus->pr_sid = p->signal->session;
1292 if (thread_group_leader(p)) {
1293 /*
1294 * This is the record for the group leader. Add in the
1295 * cumulative times of previous dead threads. This total
1296 * won't include the time of each live thread whose state
1297 * is included in the core dump. The final total reported
1298 * to our parent process when it calls wait4 will include
1299 * those sums as well as the little bit more time it takes
1300 * this and each other thread to finish dying after the
1301 * core dump synchronization phase.
1302 */
1303 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1304 &prstatus->pr_utime);
1305 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1306 &prstatus->pr_stime);
1307 } else {
1308 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1309 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1310 }
1311 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1312 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1313}
1314
1315static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1316 struct mm_struct *mm)
1317{
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -07001318 unsigned int i, len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319
1320 /* first copy the parameters from user space */
1321 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1322
1323 len = mm->arg_end - mm->arg_start;
1324 if (len >= ELF_PRARGSZ)
1325 len = ELF_PRARGSZ-1;
1326 if (copy_from_user(&psinfo->pr_psargs,
1327 (const char __user *)mm->arg_start, len))
1328 return -EFAULT;
1329 for(i = 0; i < len; i++)
1330 if (psinfo->pr_psargs[i] == 0)
1331 psinfo->pr_psargs[i] = ' ';
1332 psinfo->pr_psargs[len] = 0;
1333
1334 psinfo->pr_pid = p->pid;
1335 psinfo->pr_ppid = p->parent->pid;
1336 psinfo->pr_pgrp = process_group(p);
1337 psinfo->pr_sid = p->signal->session;
1338
1339 i = p->state ? ffz(~p->state) + 1 : 0;
1340 psinfo->pr_state = i;
Carsten Otte55148542006-03-25 03:08:22 -08001341 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1343 psinfo->pr_nice = task_nice(p);
1344 psinfo->pr_flag = p->flags;
1345 SET_UID(psinfo->pr_uid, p->uid);
1346 SET_GID(psinfo->pr_gid, p->gid);
1347 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1348
1349 return 0;
1350}
1351
1352/* Here is the structure in which status of each thread is captured. */
1353struct elf_thread_status
1354{
1355 struct list_head list;
1356 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1357 elf_fpregset_t fpu; /* NT_PRFPREG */
1358 struct task_struct *thread;
1359#ifdef ELF_CORE_COPY_XFPREGS
1360 elf_fpxregset_t xfpu; /* NT_PRXFPREG */
1361#endif
1362 struct memelfnote notes[3];
1363 int num_notes;
1364};
1365
1366/*
1367 * In order to add the specific thread information for the elf file format,
1368 * we need to keep a linked list of every threads pr_status and then
1369 * create a single section for them in the final core file.
1370 */
1371static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1372{
1373 int sz = 0;
1374 struct task_struct *p = t->thread;
1375 t->num_notes = 0;
1376
1377 fill_prstatus(&t->prstatus, p, signr);
1378 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1379
1380 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus));
1381 t->num_notes++;
1382 sz += notesize(&t->notes[0]);
1383
1384 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu))) {
1385 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), &(t->fpu));
1386 t->num_notes++;
1387 sz += notesize(&t->notes[1]);
1388 }
1389
1390#ifdef ELF_CORE_COPY_XFPREGS
1391 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1392 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), &t->xfpu);
1393 t->num_notes++;
1394 sz += notesize(&t->notes[2]);
1395 }
1396#endif
1397 return sz;
1398}
1399
1400/*
1401 * Actual dumper
1402 *
1403 * This is a two-pass process; first we find the offsets of the bits,
1404 * and then they are actually written out. If we run out of core limit
1405 * we just truncate.
1406 */
1407static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
1408{
1409#define NUM_NOTES 6
1410 int has_dumped = 0;
1411 mm_segment_t fs;
1412 int segs;
1413 size_t size = 0;
1414 int i;
1415 struct vm_area_struct *vma;
1416 struct elfhdr *elf = NULL;
1417 off_t offset = 0, dataoff;
1418 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1419 int numnote;
1420 struct memelfnote *notes = NULL;
1421 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1422 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1423 struct task_struct *g, *p;
1424 LIST_HEAD(thread_list);
1425 struct list_head *t;
1426 elf_fpregset_t *fpu = NULL;
1427#ifdef ELF_CORE_COPY_XFPREGS
1428 elf_fpxregset_t *xfpu = NULL;
1429#endif
1430 int thread_status_size = 0;
1431 elf_addr_t *auxv;
1432
1433 /*
1434 * We no longer stop all VM operations.
1435 *
1436 * This is because those proceses that could possibly change map_count or
1437 * the mmap / vma pages are now blocked in do_exit on current finishing
1438 * this core dump.
1439 *
1440 * Only ptrace can touch these memory addresses, but it doesn't change
1441 * the map_count or the pages allocated. So no possibility of crashing
1442 * exists while dumping the mm->vm_next areas to the core file.
1443 */
1444
1445 /* alloc memory for large data structures: too large to be on stack */
1446 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1447 if (!elf)
1448 goto cleanup;
1449 prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1450 if (!prstatus)
1451 goto cleanup;
1452 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1453 if (!psinfo)
1454 goto cleanup;
1455 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1456 if (!notes)
1457 goto cleanup;
1458 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1459 if (!fpu)
1460 goto cleanup;
1461#ifdef ELF_CORE_COPY_XFPREGS
1462 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1463 if (!xfpu)
1464 goto cleanup;
1465#endif
1466
1467 if (signr) {
1468 struct elf_thread_status *tmp;
1469 read_lock(&tasklist_lock);
1470 do_each_thread(g,p)
1471 if (current->mm == p->mm && current != p) {
Oliver Neukum11b0b5a2006-03-25 03:08:13 -08001472 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 if (!tmp) {
1474 read_unlock(&tasklist_lock);
1475 goto cleanup;
1476 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 INIT_LIST_HEAD(&tmp->list);
1478 tmp->thread = p;
1479 list_add(&tmp->list, &thread_list);
1480 }
1481 while_each_thread(g,p);
1482 read_unlock(&tasklist_lock);
1483 list_for_each(t, &thread_list) {
1484 struct elf_thread_status *tmp;
1485 int sz;
1486
1487 tmp = list_entry(t, struct elf_thread_status, list);
1488 sz = elf_dump_thread_status(signr, tmp);
1489 thread_status_size += sz;
1490 }
1491 }
1492 /* now collect the dump for the current */
1493 memset(prstatus, 0, sizeof(*prstatus));
1494 fill_prstatus(prstatus, current, signr);
1495 elf_core_copy_regs(&prstatus->pr_reg, regs);
1496
1497 segs = current->mm->map_count;
1498#ifdef ELF_CORE_EXTRA_PHDRS
1499 segs += ELF_CORE_EXTRA_PHDRS;
1500#endif
1501
1502 /* Set up header */
1503 fill_elf_header(elf, segs+1); /* including notes section */
1504
1505 has_dumped = 1;
1506 current->flags |= PF_DUMPCORE;
1507
1508 /*
1509 * Set up the notes in similar form to SVR4 core dumps made
1510 * with info from their /proc.
1511 */
1512
1513 fill_note(notes +0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1514
1515 fill_psinfo(psinfo, current->group_leader, current->mm);
1516 fill_note(notes +1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1517
Eric W. Biedermana9289722005-10-30 15:02:08 -08001518 numnote = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519
1520 auxv = (elf_addr_t *) current->mm->saved_auxv;
1521
1522 i = 0;
1523 do
1524 i += 2;
1525 while (auxv[i - 2] != AT_NULL);
1526 fill_note(&notes[numnote++], "CORE", NT_AUXV,
1527 i * sizeof (elf_addr_t), auxv);
1528
1529 /* Try to dump the FPU. */
1530 if ((prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, fpu)))
1531 fill_note(notes + numnote++,
1532 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1533#ifdef ELF_CORE_COPY_XFPREGS
1534 if (elf_core_copy_task_xfpregs(current, xfpu))
1535 fill_note(notes + numnote++,
1536 "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1537#endif
1538
1539 fs = get_fs();
1540 set_fs(KERNEL_DS);
1541
1542 DUMP_WRITE(elf, sizeof(*elf));
1543 offset += sizeof(*elf); /* Elf header */
1544 offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */
1545
1546 /* Write notes phdr entry */
1547 {
1548 struct elf_phdr phdr;
1549 int sz = 0;
1550
1551 for (i = 0; i < numnote; i++)
1552 sz += notesize(notes + i);
1553
1554 sz += thread_status_size;
1555
1556 fill_elf_note_phdr(&phdr, sz, offset);
1557 offset += sz;
1558 DUMP_WRITE(&phdr, sizeof(phdr));
1559 }
1560
1561 /* Page-align dumped data */
1562 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1563
1564 /* Write program headers for segments dump */
1565 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1566 struct elf_phdr phdr;
1567 size_t sz;
1568
1569 sz = vma->vm_end - vma->vm_start;
1570
1571 phdr.p_type = PT_LOAD;
1572 phdr.p_offset = offset;
1573 phdr.p_vaddr = vma->vm_start;
1574 phdr.p_paddr = 0;
1575 phdr.p_filesz = maydump(vma) ? sz : 0;
1576 phdr.p_memsz = sz;
1577 offset += phdr.p_filesz;
1578 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1579 if (vma->vm_flags & VM_WRITE) phdr.p_flags |= PF_W;
1580 if (vma->vm_flags & VM_EXEC) phdr.p_flags |= PF_X;
1581 phdr.p_align = ELF_EXEC_PAGESIZE;
1582
1583 DUMP_WRITE(&phdr, sizeof(phdr));
1584 }
1585
1586#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1587 ELF_CORE_WRITE_EXTRA_PHDRS;
1588#endif
1589
1590 /* write out the notes section */
1591 for (i = 0; i < numnote; i++)
1592 if (!writenote(notes + i, file))
1593 goto end_coredump;
1594
1595 /* write out the thread status notes section */
1596 list_for_each(t, &thread_list) {
1597 struct elf_thread_status *tmp = list_entry(t, struct elf_thread_status, list);
1598 for (i = 0; i < tmp->num_notes; i++)
1599 if (!writenote(&tmp->notes[i], file))
1600 goto end_coredump;
1601 }
1602
1603 DUMP_SEEK(dataoff);
1604
1605 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1606 unsigned long addr;
1607
1608 if (!maydump(vma))
1609 continue;
1610
1611 for (addr = vma->vm_start;
1612 addr < vma->vm_end;
1613 addr += PAGE_SIZE) {
1614 struct page* page;
1615 struct vm_area_struct *vma;
1616
1617 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1618 &page, &vma) <= 0) {
1619 DUMP_SEEK (file->f_pos + PAGE_SIZE);
1620 } else {
1621 if (page == ZERO_PAGE(addr)) {
1622 DUMP_SEEK (file->f_pos + PAGE_SIZE);
1623 } else {
1624 void *kaddr;
1625 flush_cache_page(vma, addr, page_to_pfn(page));
1626 kaddr = kmap(page);
1627 if ((size += PAGE_SIZE) > limit ||
1628 !dump_write(file, kaddr,
1629 PAGE_SIZE)) {
1630 kunmap(page);
1631 page_cache_release(page);
1632 goto end_coredump;
1633 }
1634 kunmap(page);
1635 }
1636 page_cache_release(page);
1637 }
1638 }
1639 }
1640
1641#ifdef ELF_CORE_WRITE_EXTRA_DATA
1642 ELF_CORE_WRITE_EXTRA_DATA;
1643#endif
1644
Jesper Juhl74da6cd2006-01-11 01:51:26 +01001645 if ((off_t)file->f_pos != offset) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 /* Sanity check */
Jesper Juhl74da6cd2006-01-11 01:51:26 +01001647 printk(KERN_WARNING "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
1648 (off_t)file->f_pos, offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649 }
1650
1651end_coredump:
1652 set_fs(fs);
1653
1654cleanup:
Jesper Juhl74da6cd2006-01-11 01:51:26 +01001655 while (!list_empty(&thread_list)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656 struct list_head *tmp = thread_list.next;
1657 list_del(tmp);
1658 kfree(list_entry(tmp, struct elf_thread_status, list));
1659 }
1660
1661 kfree(elf);
1662 kfree(prstatus);
1663 kfree(psinfo);
1664 kfree(notes);
1665 kfree(fpu);
1666#ifdef ELF_CORE_COPY_XFPREGS
1667 kfree(xfpu);
1668#endif
1669 return has_dumped;
1670#undef NUM_NOTES
1671}
1672
1673#endif /* USE_ELF_CORE_DUMP */
1674
1675static int __init init_elf_binfmt(void)
1676{
1677 return register_binfmt(&elf_format);
1678}
1679
1680static void __exit exit_elf_binfmt(void)
1681{
1682 /* Remove the COFF and ELF loaders. */
1683 unregister_binfmt(&elf_format);
1684}
1685
1686core_initcall(init_elf_binfmt);
1687module_exit(exit_elf_binfmt);
1688MODULE_LICENSE("GPL");