blob: 66cd711a6b1a392733187b4c72d8e6dc5cc6b919 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/fs.h>
15#include <linux/stat.h>
16#include <linux/time.h>
17#include <linux/mm.h>
18#include <linux/mman.h>
19#include <linux/a.out.h>
20#include <linux/errno.h>
21#include <linux/signal.h>
22#include <linux/binfmts.h>
23#include <linux/string.h>
24#include <linux/file.h>
25#include <linux/fcntl.h>
26#include <linux/ptrace.h>
27#include <linux/slab.h>
28#include <linux/shm.h>
29#include <linux/personality.h>
30#include <linux/elfcore.h>
31#include <linux/init.h>
32#include <linux/highuid.h>
33#include <linux/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <linux/compiler.h>
35#include <linux/highmem.h>
36#include <linux/pagemap.h>
37#include <linux/security.h>
38#include <linux/syscalls.h>
39#include <linux/random.h>
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070040#include <linux/elf.h>
Alexey Dobriyan7e80d0d2007-05-08 00:28:59 -070041#include <linux/utsname.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <asm/uaccess.h>
43#include <asm/param.h>
44#include <asm/page.h>
45
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070046static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47static int load_elf_library(struct file *);
Andrew Mortond4e3cc32007-07-21 04:37:32 -070048static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
Linus Torvalds1da177e2005-04-16 15:20:36 -070050/*
51 * If we don't support core dumping, then supply a NULL so we
52 * don't even try.
53 */
Matt Mackall708e9a72006-01-08 01:05:25 -080054#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Neil Horman7dc0b222007-10-16 23:26:34 -070055static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#else
57#define elf_core_dump NULL
58#endif
59
60#if ELF_EXEC_PAGESIZE > PAGE_SIZE
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070061#define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#else
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070063#define ELF_MIN_ALIGN PAGE_SIZE
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#endif
65
66#ifndef ELF_CORE_EFLAGS
67#define ELF_CORE_EFLAGS 0
68#endif
69
70#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74static struct linux_binfmt elf_format = {
75 .module = THIS_MODULE,
76 .load_binary = load_elf_binary,
77 .load_shlib = load_elf_library,
78 .core_dump = elf_core_dump,
Andi Kleen9fbbd4d2007-02-13 13:26:26 +010079 .min_coredump = ELF_EXEC_PAGESIZE,
80 .hasvdso = 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070081};
82
Andrew Mortond4e3cc32007-07-21 04:37:32 -070083#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int set_brk(unsigned long start, unsigned long end)
86{
87 start = ELF_PAGEALIGN(start);
88 end = ELF_PAGEALIGN(end);
89 if (end > start) {
90 unsigned long addr;
91 down_write(&current->mm->mmap_sem);
92 addr = do_brk(start, end - start);
93 up_write(&current->mm->mmap_sem);
94 if (BAD_ADDR(addr))
95 return addr;
96 }
97 current->mm->start_brk = current->mm->brk = end;
98 return 0;
99}
100
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101/* We need to explicitly zero any fractional pages
102 after the data section (i.e. bss). This would
103 contain the junk from the file that should not
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700104 be in memory
105 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106static int padzero(unsigned long elf_bss)
107{
108 unsigned long nbyte;
109
110 nbyte = ELF_PAGEOFFSET(elf_bss);
111 if (nbyte) {
112 nbyte = ELF_MIN_ALIGN - nbyte;
113 if (clear_user((void __user *) elf_bss, nbyte))
114 return -EFAULT;
115 }
116 return 0;
117}
118
119/* Let's use some macros to make this stack manipulation a litle clearer */
120#ifdef CONFIG_STACK_GROWSUP
121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122#define STACK_ROUND(sp, items) \
123 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700124#define STACK_ALLOC(sp, len) ({ \
125 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 old_sp; })
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127#else
128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129#define STACK_ROUND(sp, items) \
130 (((unsigned long) (sp - items)) &~ 15UL)
131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132#endif
133
134static int
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700135create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136 int interp_aout, unsigned long load_addr,
137 unsigned long interp_load_addr)
138{
139 unsigned long p = bprm->p;
140 int argc = bprm->argc;
141 int envc = bprm->envc;
142 elf_addr_t __user *argv;
143 elf_addr_t __user *envp;
144 elf_addr_t __user *sp;
145 elf_addr_t __user *u_platform;
146 const char *k_platform = ELF_PLATFORM;
147 int items;
148 elf_addr_t *elf_info;
149 int ei_index = 0;
150 struct task_struct *tsk = current;
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700151 struct vm_area_struct *vma;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
153 /*
154 * If this architecture has a platform capability string, copy it
155 * to userspace. In some cases (Sparc), this info is impossible
156 * for userspace to get any other way, in others (i386) it is
157 * merely difficult.
158 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 u_platform = NULL;
160 if (k_platform) {
161 size_t len = strlen(k_platform) + 1;
162
163 /*
164 * In some cases (e.g. Hyper-Threading), we want to avoid L1
165 * evictions by the processes running on the same package. One
166 * thing we can do is to shuffle the initial stack for them.
167 */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700168
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 p = arch_align_stack(p);
170
171 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
172 if (__copy_to_user(u_platform, k_platform, len))
173 return -EFAULT;
174 }
175
176 /* Create the ELF interpreter info */
Jesper Juhl785d5572006-06-23 02:05:35 -0700177 elf_info = (elf_addr_t *)current->mm->saved_auxv;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178#define NEW_AUX_ENT(id, val) \
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700179 do { \
Jesper Juhl785d5572006-06-23 02:05:35 -0700180 elf_info[ei_index++] = id; \
181 elf_info[ei_index++] = val; \
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700182 } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
184#ifdef ARCH_DLINFO
185 /*
186 * ARCH_DLINFO must come first so PPC can do its special alignment of
187 * AUXV.
188 */
189 ARCH_DLINFO;
190#endif
191 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
192 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
193 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
194 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700195 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
197 NEW_AUX_ENT(AT_BASE, interp_load_addr);
198 NEW_AUX_ENT(AT_FLAGS, 0);
199 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
Jesper Juhl785d5572006-06-23 02:05:35 -0700200 NEW_AUX_ENT(AT_UID, tsk->uid);
201 NEW_AUX_ENT(AT_EUID, tsk->euid);
202 NEW_AUX_ENT(AT_GID, tsk->gid);
203 NEW_AUX_ENT(AT_EGID, tsk->egid);
204 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 if (k_platform) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700206 NEW_AUX_ENT(AT_PLATFORM,
Jesper Juhl785d5572006-06-23 02:05:35 -0700207 (elf_addr_t)(unsigned long)u_platform);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 }
209 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
Jesper Juhl785d5572006-06-23 02:05:35 -0700210 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 }
212#undef NEW_AUX_ENT
213 /* AT_NULL is zero; clear the rest too */
214 memset(&elf_info[ei_index], 0,
215 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
216
217 /* And advance past the AT_NULL entry. */
218 ei_index += 2;
219
220 sp = STACK_ADD(p, ei_index);
221
222 items = (argc + 1) + (envc + 1);
223 if (interp_aout) {
224 items += 3; /* a.out interpreters require argv & envp too */
225 } else {
226 items += 1; /* ELF interpreters only put argc on the stack */
227 }
228 bprm->p = STACK_ROUND(sp, items);
229
230 /* Point sp at the lowest address on the stack */
231#ifdef CONFIG_STACK_GROWSUP
232 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700233 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234#else
235 sp = (elf_addr_t __user *)bprm->p;
236#endif
237
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700238
239 /*
240 * Grow the stack manually; some architectures have a limit on how
241 * far ahead a user-space access may be in order to grow the stack.
242 */
243 vma = find_extend_vma(current->mm, bprm->p);
244 if (!vma)
245 return -EFAULT;
246
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
248 if (__put_user(argc, sp++))
249 return -EFAULT;
250 if (interp_aout) {
251 argv = sp + 2;
252 envp = argv + argc + 1;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800253 if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
254 __put_user((elf_addr_t)(unsigned long)envp, sp++))
255 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 } else {
257 argv = sp;
258 envp = argv + argc + 1;
259 }
260
261 /* Populate argv and envp */
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -0700262 p = current->mm->arg_end = current->mm->arg_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 while (argc-- > 0) {
264 size_t len;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800265 if (__put_user((elf_addr_t)p, argv++))
266 return -EFAULT;
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700267 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
268 if (!len || len > MAX_ARG_STRLEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 return 0;
270 p += len;
271 }
272 if (__put_user(0, argv))
273 return -EFAULT;
274 current->mm->arg_end = current->mm->env_start = p;
275 while (envc-- > 0) {
276 size_t len;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800277 if (__put_user((elf_addr_t)p, envp++))
278 return -EFAULT;
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700279 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
280 if (!len || len > MAX_ARG_STRLEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 return 0;
282 p += len;
283 }
284 if (__put_user(0, envp))
285 return -EFAULT;
286 current->mm->env_end = p;
287
288 /* Put the elf_info on the stack in the right place. */
289 sp = (elf_addr_t __user *)envp + 1;
290 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
291 return -EFAULT;
292 return 0;
293}
294
295#ifndef elf_map
296
297static unsigned long elf_map(struct file *filep, unsigned long addr,
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700298 struct elf_phdr *eppnt, int prot, int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299{
300 unsigned long map_addr;
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700301 unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700302
303 down_write(&current->mm->mmap_sem);
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700304 /* mmap() will return -EINVAL if given a zero size, but a
305 * segment with zero filesize is perfectly valid */
306 if (eppnt->p_filesz + pageoffset)
307 map_addr = do_mmap(filep, ELF_PAGESTART(addr),
308 eppnt->p_filesz + pageoffset, prot, type,
309 eppnt->p_offset - pageoffset);
310 else
311 map_addr = ELF_PAGESTART(addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 up_write(&current->mm->mmap_sem);
313 return(map_addr);
314}
315
316#endif /* !elf_map */
317
318/* This is much more generalized than the library routine read function,
319 so we keep this separate. Technically the library read function
320 is only provided so that we can read a.out libraries that have
321 an ELF header */
322
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700323static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700324 struct file *interpreter, unsigned long *interp_load_addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325{
326 struct elf_phdr *elf_phdata;
327 struct elf_phdr *eppnt;
328 unsigned long load_addr = 0;
329 int load_addr_set = 0;
330 unsigned long last_bss = 0, elf_bss = 0;
331 unsigned long error = ~0UL;
332 int retval, i, size;
333
334 /* First of all, some simple consistency checks */
335 if (interp_elf_ex->e_type != ET_EXEC &&
336 interp_elf_ex->e_type != ET_DYN)
337 goto out;
338 if (!elf_check_arch(interp_elf_ex))
339 goto out;
340 if (!interpreter->f_op || !interpreter->f_op->mmap)
341 goto out;
342
343 /*
344 * If the size of this structure has changed, then punt, since
345 * we will be doing the wrong thing.
346 */
347 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
348 goto out;
349 if (interp_elf_ex->e_phnum < 1 ||
350 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
351 goto out;
352
353 /* Now read in all of the header information */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
355 if (size > ELF_MIN_ALIGN)
356 goto out;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700357 elf_phdata = kmalloc(size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 if (!elf_phdata)
359 goto out;
360
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700361 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
362 (char *)elf_phdata,size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 error = -EIO;
364 if (retval != size) {
365 if (retval < 0)
366 error = retval;
367 goto out_close;
368 }
369
370 eppnt = elf_phdata;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700371 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
372 if (eppnt->p_type == PT_LOAD) {
373 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
374 int elf_prot = 0;
375 unsigned long vaddr = 0;
376 unsigned long k, map_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700378 if (eppnt->p_flags & PF_R)
379 elf_prot = PROT_READ;
380 if (eppnt->p_flags & PF_W)
381 elf_prot |= PROT_WRITE;
382 if (eppnt->p_flags & PF_X)
383 elf_prot |= PROT_EXEC;
384 vaddr = eppnt->p_vaddr;
385 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
386 elf_type |= MAP_FIXED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700388 map_addr = elf_map(interpreter, load_addr + vaddr,
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700389 eppnt, elf_prot, elf_type);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700390 error = map_addr;
391 if (BAD_ADDR(map_addr))
392 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700394 if (!load_addr_set &&
395 interp_elf_ex->e_type == ET_DYN) {
396 load_addr = map_addr - ELF_PAGESTART(vaddr);
397 load_addr_set = 1;
398 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700400 /*
401 * Check to see if the section's size will overflow the
402 * allowed task size. Note that p_filesz must always be
403 * <= p_memsize so it's only necessary to check p_memsz.
404 */
405 k = load_addr + eppnt->p_vaddr;
Chuck Ebbertce510592006-07-03 00:24:14 -0700406 if (BAD_ADDR(k) ||
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700407 eppnt->p_filesz > eppnt->p_memsz ||
408 eppnt->p_memsz > TASK_SIZE ||
409 TASK_SIZE - eppnt->p_memsz < k) {
410 error = -ENOMEM;
411 goto out_close;
412 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700414 /*
415 * Find the end of the file mapping for this phdr, and
416 * keep track of the largest address we see for this.
417 */
418 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
419 if (k > elf_bss)
420 elf_bss = k;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700422 /*
423 * Do the same thing for the memory mapping - between
424 * elf_bss and last_bss is the bss section.
425 */
426 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
427 if (k > last_bss)
428 last_bss = k;
429 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 }
431
432 /*
433 * Now fill out the bss section. First pad the last page up
434 * to the page boundary, and then perform a mmap to make sure
435 * that there are zero-mapped pages up to and including the
436 * last bss page.
437 */
438 if (padzero(elf_bss)) {
439 error = -EFAULT;
440 goto out_close;
441 }
442
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700443 /* What we have mapped so far */
444 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
446 /* Map the last of the bss segment */
447 if (last_bss > elf_bss) {
448 down_write(&current->mm->mmap_sem);
449 error = do_brk(elf_bss, last_bss - elf_bss);
450 up_write(&current->mm->mmap_sem);
451 if (BAD_ADDR(error))
452 goto out_close;
453 }
454
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700455 *interp_load_addr = load_addr;
456 error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457
458out_close:
459 kfree(elf_phdata);
460out:
461 return error;
462}
463
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700464static unsigned long load_aout_interp(struct exec *interp_ex,
465 struct file *interpreter)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466{
467 unsigned long text_data, elf_entry = ~0UL;
468 char __user * addr;
469 loff_t offset;
470
471 current->mm->end_code = interp_ex->a_text;
472 text_data = interp_ex->a_text + interp_ex->a_data;
473 current->mm->end_data = text_data;
474 current->mm->brk = interp_ex->a_bss + text_data;
475
476 switch (N_MAGIC(*interp_ex)) {
477 case OMAGIC:
478 offset = 32;
479 addr = (char __user *)0;
480 break;
481 case ZMAGIC:
482 case QMAGIC:
483 offset = N_TXTOFF(*interp_ex);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700484 addr = (char __user *)N_TXTADDR(*interp_ex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 break;
486 default:
487 goto out;
488 }
489
490 down_write(&current->mm->mmap_sem);
491 do_brk(0, text_data);
492 up_write(&current->mm->mmap_sem);
493 if (!interpreter->f_op || !interpreter->f_op->read)
494 goto out;
495 if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
496 goto out;
497 flush_icache_range((unsigned long)addr,
498 (unsigned long)addr + text_data);
499
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 down_write(&current->mm->mmap_sem);
501 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
502 interp_ex->a_bss);
503 up_write(&current->mm->mmap_sem);
504 elf_entry = interp_ex->a_entry;
505
506out:
507 return elf_entry;
508}
509
510/*
511 * These are the functions used to load ELF style executables and shared
512 * libraries. There is no binary dependent code anywhere else.
513 */
514
515#define INTERPRETER_NONE 0
516#define INTERPRETER_AOUT 1
517#define INTERPRETER_ELF 2
518
Andi Kleen913bd902006-03-25 16:29:09 +0100519#ifndef STACK_RND_MASK
James Bottomleyd1cabd62007-03-16 13:38:35 -0800520#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
Andi Kleen913bd902006-03-25 16:29:09 +0100521#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522
523static unsigned long randomize_stack_top(unsigned long stack_top)
524{
525 unsigned int random_variable = 0;
526
Andi Kleenc16b63e2006-09-26 10:52:28 +0200527 if ((current->flags & PF_RANDOMIZE) &&
528 !(current->personality & ADDR_NO_RANDOMIZE)) {
Andi Kleen913bd902006-03-25 16:29:09 +0100529 random_variable = get_random_int() & STACK_RND_MASK;
530 random_variable <<= PAGE_SHIFT;
531 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532#ifdef CONFIG_STACK_GROWSUP
Andi Kleen913bd902006-03-25 16:29:09 +0100533 return PAGE_ALIGN(stack_top) + random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534#else
Andi Kleen913bd902006-03-25 16:29:09 +0100535 return PAGE_ALIGN(stack_top) - random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536#endif
537}
538
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700539static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540{
541 struct file *interpreter = NULL; /* to shut gcc up */
542 unsigned long load_addr = 0, load_bias = 0;
543 int load_addr_set = 0;
544 char * elf_interpreter = NULL;
545 unsigned int interpreter_type = INTERPRETER_NONE;
546 unsigned char ibcs2_interpreter = 0;
547 unsigned long error;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700548 struct elf_phdr *elf_ppnt, *elf_phdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 unsigned long elf_bss, elf_brk;
550 int elf_exec_fileno;
551 int retval, i;
552 unsigned int size;
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700553 unsigned long elf_entry, interp_load_addr = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 unsigned long start_code, end_code, start_data, end_data;
555 unsigned long reloc_func_desc = 0;
556 char passed_fileno[6];
557 struct files_struct *files;
David Rientjes8de61e62006-12-06 20:40:16 -0800558 int executable_stack = EXSTACK_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 unsigned long def_flags = 0;
560 struct {
561 struct elfhdr elf_ex;
562 struct elfhdr interp_elf_ex;
563 struct exec interp_ex;
564 } *loc;
565
566 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
567 if (!loc) {
568 retval = -ENOMEM;
569 goto out_ret;
570 }
571
572 /* Get the exec-header */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700573 loc->elf_ex = *((struct elfhdr *)bprm->buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574
575 retval = -ENOEXEC;
576 /* First of all, some simple consistency checks */
577 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
578 goto out;
579
580 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
581 goto out;
582 if (!elf_check_arch(&loc->elf_ex))
583 goto out;
584 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
585 goto out;
586
587 /* Now read in all of the header information */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
589 goto out;
590 if (loc->elf_ex.e_phnum < 1 ||
591 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
592 goto out;
593 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
594 retval = -ENOMEM;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700595 elf_phdata = kmalloc(size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 if (!elf_phdata)
597 goto out;
598
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700599 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
600 (char *)elf_phdata, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 if (retval != size) {
602 if (retval >= 0)
603 retval = -EIO;
604 goto out_free_ph;
605 }
606
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700607 files = current->files; /* Refcounted so ok */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 retval = unshare_files();
609 if (retval < 0)
610 goto out_free_ph;
611 if (files == current->files) {
612 put_files_struct(files);
613 files = NULL;
614 }
615
616 /* exec will make our files private anyway, but for the a.out
617 loader stuff we need to do it earlier */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 retval = get_unused_fd();
619 if (retval < 0)
620 goto out_free_fh;
621 get_file(bprm->file);
622 fd_install(elf_exec_fileno = retval, bprm->file);
623
624 elf_ppnt = elf_phdata;
625 elf_bss = 0;
626 elf_brk = 0;
627
628 start_code = ~0UL;
629 end_code = 0;
630 start_data = 0;
631 end_data = 0;
632
633 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
634 if (elf_ppnt->p_type == PT_INTERP) {
635 /* This is the program interpreter used for
636 * shared libraries - for now assume that this
637 * is an a.out format binary
638 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639 retval = -ENOEXEC;
640 if (elf_ppnt->p_filesz > PATH_MAX ||
641 elf_ppnt->p_filesz < 2)
642 goto out_free_file;
643
644 retval = -ENOMEM;
Jesper Juhl792db3a2006-01-09 20:54:45 -0800645 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700646 GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 if (!elf_interpreter)
648 goto out_free_file;
649
650 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700651 elf_interpreter,
652 elf_ppnt->p_filesz);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 if (retval != elf_ppnt->p_filesz) {
654 if (retval >= 0)
655 retval = -EIO;
656 goto out_free_interp;
657 }
658 /* make sure path is NULL terminated */
659 retval = -ENOEXEC;
660 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
661 goto out_free_interp;
662
663 /* If the program interpreter is one of these two,
664 * then assume an iBCS2 image. Otherwise assume
665 * a native linux image.
666 */
667 if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
668 strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
669 ibcs2_interpreter = 1;
670
671 /*
672 * The early SET_PERSONALITY here is so that the lookup
673 * for the interpreter happens in the namespace of the
674 * to-be-execed image. SET_PERSONALITY can select an
675 * alternate root.
676 *
677 * However, SET_PERSONALITY is NOT allowed to switch
678 * this task into the new images's memory mapping
679 * policy - that is, TASK_SIZE must still evaluate to
680 * that which is appropriate to the execing application.
681 * This is because exit_mmap() needs to have TASK_SIZE
682 * evaluate to the size of the old image.
683 *
684 * So if (say) a 64-bit application is execing a 32-bit
685 * application it is the architecture's responsibility
686 * to defer changing the value of TASK_SIZE until the
687 * switch really is going to happen - do this in
688 * flush_thread(). - akpm
689 */
690 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
691
692 interpreter = open_exec(elf_interpreter);
693 retval = PTR_ERR(interpreter);
694 if (IS_ERR(interpreter))
695 goto out_free_interp;
Alexey Dobriyan1fb84492007-01-26 00:57:16 -0800696
697 /*
698 * If the binary is not readable then enforce
699 * mm->dumpable = 0 regardless of the interpreter's
700 * permissions.
701 */
702 if (file_permission(interpreter, MAY_READ) < 0)
703 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
704
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700705 retval = kernel_read(interpreter, 0, bprm->buf,
706 BINPRM_BUF_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 if (retval != BINPRM_BUF_SIZE) {
708 if (retval >= 0)
709 retval = -EIO;
710 goto out_free_dentry;
711 }
712
713 /* Get the exec headers */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700714 loc->interp_ex = *((struct exec *)bprm->buf);
715 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 break;
717 }
718 elf_ppnt++;
719 }
720
721 elf_ppnt = elf_phdata;
722 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
723 if (elf_ppnt->p_type == PT_GNU_STACK) {
724 if (elf_ppnt->p_flags & PF_X)
725 executable_stack = EXSTACK_ENABLE_X;
726 else
727 executable_stack = EXSTACK_DISABLE_X;
728 break;
729 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730
731 /* Some simple consistency checks for the interpreter */
732 if (elf_interpreter) {
Andi Kleen8e9073e2007-10-16 23:26:48 -0700733 static int warn;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
735
736 /* Now figure out which format our binary is */
737 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
738 (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
739 (N_MAGIC(loc->interp_ex) != QMAGIC))
740 interpreter_type = INTERPRETER_ELF;
741
742 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
743 interpreter_type &= ~INTERPRETER_ELF;
744
Andi Kleen8e9073e2007-10-16 23:26:48 -0700745 if (interpreter_type == INTERPRETER_AOUT && warn < 10) {
746 printk(KERN_WARNING "a.out ELF interpreter %s is "
747 "deprecated and will not be supported "
748 "after Linux 2.6.25\n", elf_interpreter);
749 warn++;
750 }
751
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 retval = -ELIBBAD;
753 if (!interpreter_type)
754 goto out_free_dentry;
755
756 /* Make sure only one type was selected */
757 if ((interpreter_type & INTERPRETER_ELF) &&
758 interpreter_type != INTERPRETER_ELF) {
759 // FIXME - ratelimit this before re-enabling
760 // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
761 interpreter_type = INTERPRETER_ELF;
762 }
763 /* Verify the interpreter has a valid arch */
764 if ((interpreter_type == INTERPRETER_ELF) &&
765 !elf_check_arch(&loc->interp_elf_ex))
766 goto out_free_dentry;
767 } else {
768 /* Executables without an interpreter also need a personality */
769 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
770 }
771
772 /* OK, we are done with that, now set up the arg stuff,
773 and then start this sucker up */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
775 char *passed_p = passed_fileno;
776 sprintf(passed_fileno, "%d", elf_exec_fileno);
777
778 if (elf_interpreter) {
779 retval = copy_strings_kernel(1, &passed_p, bprm);
780 if (retval)
781 goto out_free_dentry;
782 bprm->argc++;
783 }
784 }
785
786 /* Flush all traces of the currently running executable */
787 retval = flush_old_exec(bprm);
788 if (retval)
789 goto out_free_dentry;
790
791 /* Discard our unneeded old files struct */
792 if (files) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 put_files_struct(files);
794 files = NULL;
795 }
796
797 /* OK, This is the point of no return */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 current->flags &= ~PF_FORKNOEXEC;
799 current->mm->def_flags = def_flags;
800
801 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
802 may depend on the personality. */
803 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
804 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
805 current->personality |= READ_IMPLIES_EXEC;
806
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700807 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 current->flags |= PF_RANDOMIZE;
809 arch_pick_mmap_layout(current->mm);
810
811 /* Do this so that we can load the interpreter, if need be. We will
812 change some of these later */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 current->mm->free_area_cache = current->mm->mmap_base;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700814 current->mm->cached_hole_size = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
816 executable_stack);
817 if (retval < 0) {
818 send_sig(SIGKILL, current, 0);
819 goto out_free_dentry;
820 }
821
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 current->mm->start_stack = bprm->p;
823
824 /* Now we do a little grungy work by mmaping the ELF image into
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700825 the correct location in memory. At this point, we assume that
826 the image should be loaded at fixed address, not at a variable
827 address. */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700828 for(i = 0, elf_ppnt = elf_phdata;
829 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 int elf_prot = 0, elf_flags;
831 unsigned long k, vaddr;
832
833 if (elf_ppnt->p_type != PT_LOAD)
834 continue;
835
836 if (unlikely (elf_brk > elf_bss)) {
837 unsigned long nbyte;
838
839 /* There was a PT_LOAD segment with p_memsz > p_filesz
840 before this one. Map anonymous pages, if needed,
841 and clear the area. */
842 retval = set_brk (elf_bss + load_bias,
843 elf_brk + load_bias);
844 if (retval) {
845 send_sig(SIGKILL, current, 0);
846 goto out_free_dentry;
847 }
848 nbyte = ELF_PAGEOFFSET(elf_bss);
849 if (nbyte) {
850 nbyte = ELF_MIN_ALIGN - nbyte;
851 if (nbyte > elf_brk - elf_bss)
852 nbyte = elf_brk - elf_bss;
853 if (clear_user((void __user *)elf_bss +
854 load_bias, nbyte)) {
855 /*
856 * This bss-zeroing can fail if the ELF
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700857 * file specifies odd protections. So
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858 * we don't check the return value
859 */
860 }
861 }
862 }
863
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700864 if (elf_ppnt->p_flags & PF_R)
865 elf_prot |= PROT_READ;
866 if (elf_ppnt->p_flags & PF_W)
867 elf_prot |= PROT_WRITE;
868 if (elf_ppnt->p_flags & PF_X)
869 elf_prot |= PROT_EXEC;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700871 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872
873 vaddr = elf_ppnt->p_vaddr;
874 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
875 elf_flags |= MAP_FIXED;
876 } else if (loc->elf_ex.e_type == ET_DYN) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700877 /* Try and get dynamic programs out of the way of the
878 * default mmap base, as well as whatever program they
879 * might try to exec. This is because the brk will
880 * follow the loader, and is not movable. */
Linus Torvalds90cb28e2007-01-06 13:28:21 -0800881 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 }
883
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700884 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700885 elf_prot, elf_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 if (BAD_ADDR(error)) {
887 send_sig(SIGKILL, current, 0);
Alexey Kuznetsovb140f252007-05-08 00:31:57 -0700888 retval = IS_ERR((void *)error) ?
889 PTR_ERR((void*)error) : -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 goto out_free_dentry;
891 }
892
893 if (!load_addr_set) {
894 load_addr_set = 1;
895 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
896 if (loc->elf_ex.e_type == ET_DYN) {
897 load_bias += error -
898 ELF_PAGESTART(load_bias + vaddr);
899 load_addr += load_bias;
900 reloc_func_desc = load_bias;
901 }
902 }
903 k = elf_ppnt->p_vaddr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700904 if (k < start_code)
905 start_code = k;
906 if (start_data < k)
907 start_data = k;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908
909 /*
910 * Check to see if the section's size will overflow the
911 * allowed task size. Note that p_filesz must always be
912 * <= p_memsz so it is only necessary to check p_memsz.
913 */
Chuck Ebbertce510592006-07-03 00:24:14 -0700914 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 elf_ppnt->p_memsz > TASK_SIZE ||
916 TASK_SIZE - elf_ppnt->p_memsz < k) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700917 /* set_brk can never work. Avoid overflows. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 send_sig(SIGKILL, current, 0);
Alexey Kuznetsovb140f252007-05-08 00:31:57 -0700919 retval = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 goto out_free_dentry;
921 }
922
923 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
924
925 if (k > elf_bss)
926 elf_bss = k;
927 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
928 end_code = k;
929 if (end_data < k)
930 end_data = k;
931 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
932 if (k > elf_brk)
933 elf_brk = k;
934 }
935
936 loc->elf_ex.e_entry += load_bias;
937 elf_bss += load_bias;
938 elf_brk += load_bias;
939 start_code += load_bias;
940 end_code += load_bias;
941 start_data += load_bias;
942 end_data += load_bias;
943
944 /* Calling set_brk effectively mmaps the pages that we need
945 * for the bss and break sections. We must do this before
946 * mapping in the interpreter, to make sure it doesn't wind
947 * up getting placed where the bss needs to go.
948 */
949 retval = set_brk(elf_bss, elf_brk);
950 if (retval) {
951 send_sig(SIGKILL, current, 0);
952 goto out_free_dentry;
953 }
akpm@osdl.org6de50512005-10-11 08:29:08 -0700954 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 send_sig(SIGSEGV, current, 0);
956 retval = -EFAULT; /* Nobody gets to see this, but.. */
957 goto out_free_dentry;
958 }
959
960 if (elf_interpreter) {
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700961 if (interpreter_type == INTERPRETER_AOUT)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 elf_entry = load_aout_interp(&loc->interp_ex,
963 interpreter);
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700964 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 elf_entry = load_elf_interp(&loc->interp_elf_ex,
966 interpreter,
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700967 &interp_load_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 if (BAD_ADDR(elf_entry)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 force_sig(SIGSEGV, current);
Chuck Ebbertce510592006-07-03 00:24:14 -0700970 retval = IS_ERR((void *)elf_entry) ?
971 (int)elf_entry : -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 goto out_free_dentry;
973 }
974 reloc_func_desc = interp_load_addr;
975
976 allow_write_access(interpreter);
977 fput(interpreter);
978 kfree(elf_interpreter);
979 } else {
980 elf_entry = loc->elf_ex.e_entry;
Suresh Siddha5342fba2006-02-26 04:18:28 +0100981 if (BAD_ADDR(elf_entry)) {
Chuck Ebbertce510592006-07-03 00:24:14 -0700982 force_sig(SIGSEGV, current);
983 retval = -EINVAL;
Suresh Siddha5342fba2006-02-26 04:18:28 +0100984 goto out_free_dentry;
985 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 }
987
988 kfree(elf_phdata);
989
990 if (interpreter_type != INTERPRETER_AOUT)
991 sys_close(elf_exec_fileno);
992
993 set_binfmt(&elf_format);
994
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -0700995#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
996 retval = arch_setup_additional_pages(bprm, executable_stack);
997 if (retval < 0) {
998 send_sig(SIGKILL, current, 0);
Roland McGrath18c8baf2005-04-28 15:17:19 -0700999 goto out;
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -07001000 }
1001#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1002
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 compute_creds(bprm);
1004 current->flags &= ~PF_FORKNOEXEC;
Ollie Wildb6a2fea2007-07-19 01:48:16 -07001005 retval = create_elf_tables(bprm, &loc->elf_ex,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001006 (interpreter_type == INTERPRETER_AOUT),
1007 load_addr, interp_load_addr);
Ollie Wildb6a2fea2007-07-19 01:48:16 -07001008 if (retval < 0) {
1009 send_sig(SIGKILL, current, 0);
1010 goto out;
1011 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 /* N.B. passed_fileno might not be initialized? */
1013 if (interpreter_type == INTERPRETER_AOUT)
1014 current->mm->arg_start += strlen(passed_fileno) + 1;
1015 current->mm->end_code = end_code;
1016 current->mm->start_code = start_code;
1017 current->mm->start_data = start_data;
1018 current->mm->end_data = end_data;
1019 current->mm->start_stack = bprm->p;
1020
1021 if (current->personality & MMAP_PAGE_ZERO) {
1022 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1023 and some applications "depend" upon this behavior.
1024 Since we do not have the power to recompile these, we
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001025 emulate the SVr4 behavior. Sigh. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 down_write(&current->mm->mmap_sem);
1027 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1028 MAP_FIXED | MAP_PRIVATE, 0);
1029 up_write(&current->mm->mmap_sem);
1030 }
1031
1032#ifdef ELF_PLAT_INIT
1033 /*
1034 * The ABI may specify that certain registers be set up in special
1035 * ways (on i386 %edx is the address of a DT_FINI function, for
1036 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1037 * that the e_entry field is the address of the function descriptor
1038 * for the startup routine, rather than the address of the startup
1039 * routine itself. This macro performs whatever initialization to
1040 * the regs structure is required as well as any relocations to the
1041 * function descriptor entries when executing dynamically links apps.
1042 */
1043 ELF_PLAT_INIT(regs, reloc_func_desc);
1044#endif
1045
1046 start_thread(regs, elf_entry, bprm->p);
1047 if (unlikely(current->ptrace & PT_PTRACED)) {
1048 if (current->ptrace & PT_TRACE_EXEC)
1049 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1050 else
1051 send_sig(SIGTRAP, current, 0);
1052 }
1053 retval = 0;
1054out:
1055 kfree(loc);
1056out_ret:
1057 return retval;
1058
1059 /* error cleanup */
1060out_free_dentry:
1061 allow_write_access(interpreter);
1062 if (interpreter)
1063 fput(interpreter);
1064out_free_interp:
Jesper Juhlf99d49a2005-11-07 01:01:34 -08001065 kfree(elf_interpreter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066out_free_file:
1067 sys_close(elf_exec_fileno);
1068out_free_fh:
Kirill Korotaev3b9b8ab2006-09-29 02:00:05 -07001069 if (files)
1070 reset_files_struct(current, files);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071out_free_ph:
1072 kfree(elf_phdata);
1073 goto out;
1074}
1075
1076/* This is really simpleminded and specialized - we are loading an
1077 a.out library that is given an ELF header. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078static int load_elf_library(struct file *file)
1079{
1080 struct elf_phdr *elf_phdata;
1081 struct elf_phdr *eppnt;
1082 unsigned long elf_bss, bss, len;
1083 int retval, error, i, j;
1084 struct elfhdr elf_ex;
1085
1086 error = -ENOEXEC;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001087 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 if (retval != sizeof(elf_ex))
1089 goto out;
1090
1091 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1092 goto out;
1093
1094 /* First of all, some simple consistency checks */
1095 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001096 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 goto out;
1098
1099 /* Now read in all of the header information */
1100
1101 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1102 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1103
1104 error = -ENOMEM;
1105 elf_phdata = kmalloc(j, GFP_KERNEL);
1106 if (!elf_phdata)
1107 goto out;
1108
1109 eppnt = elf_phdata;
1110 error = -ENOEXEC;
1111 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1112 if (retval != j)
1113 goto out_free_ph;
1114
1115 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1116 if ((eppnt + i)->p_type == PT_LOAD)
1117 j++;
1118 if (j != 1)
1119 goto out_free_ph;
1120
1121 while (eppnt->p_type != PT_LOAD)
1122 eppnt++;
1123
1124 /* Now use mmap to map the library into memory. */
1125 down_write(&current->mm->mmap_sem);
1126 error = do_mmap(file,
1127 ELF_PAGESTART(eppnt->p_vaddr),
1128 (eppnt->p_filesz +
1129 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1130 PROT_READ | PROT_WRITE | PROT_EXEC,
1131 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1132 (eppnt->p_offset -
1133 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1134 up_write(&current->mm->mmap_sem);
1135 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1136 goto out_free_ph;
1137
1138 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1139 if (padzero(elf_bss)) {
1140 error = -EFAULT;
1141 goto out_free_ph;
1142 }
1143
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001144 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1145 ELF_MIN_ALIGN - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 bss = eppnt->p_memsz + eppnt->p_vaddr;
1147 if (bss > len) {
1148 down_write(&current->mm->mmap_sem);
1149 do_brk(len, bss - len);
1150 up_write(&current->mm->mmap_sem);
1151 }
1152 error = 0;
1153
1154out_free_ph:
1155 kfree(elf_phdata);
1156out:
1157 return error;
1158}
1159
1160/*
1161 * Note that some platforms still use traditional core dumps and not
1162 * the ELF core dump. Each platform can select it as appropriate.
1163 */
Matt Mackall708e9a72006-01-08 01:05:25 -08001164#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165
1166/*
1167 * ELF core dumper
1168 *
1169 * Modelled on fs/exec.c:aout_core_dump()
1170 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1171 */
1172/*
1173 * These are the only things you should do on a core-file: use only these
1174 * functions to write out all the necessary info.
1175 */
1176static int dump_write(struct file *file, const void *addr, int nr)
1177{
1178 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1179}
1180
Daniel Jacobowitz5db92852005-06-15 22:26:34 -07001181static int dump_seek(struct file *file, loff_t off)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182{
Andi Kleend025c9d2006-09-30 23:29:28 -07001183 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
Petr Vandrovec7f14daa2006-10-13 04:13:16 +02001184 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 return 0;
Andi Kleend025c9d2006-09-30 23:29:28 -07001186 } else {
1187 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1188 if (!buf)
1189 return 0;
1190 while (off > 0) {
1191 unsigned long n = off;
1192 if (n > PAGE_SIZE)
1193 n = PAGE_SIZE;
1194 if (!dump_write(file, buf, n))
1195 return 0;
1196 off -= n;
1197 }
1198 free_page((unsigned long)buf);
1199 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 return 1;
1201}
1202
1203/*
1204 * Decide whether a segment is worth dumping; default is yes to be
1205 * sure (missing info is worse than too much; etc).
1206 * Personally I'd include everything, and use the coredump limit...
1207 *
1208 * I think we should skip something. But I am not sure how. H.J.
1209 */
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001210static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211{
Roland McGrathe5b97dd2007-01-26 00:56:48 -08001212 /* The vma can be set up to tell us the answer directly. */
1213 if (vma->vm_flags & VM_ALWAYSDUMP)
1214 return 1;
1215
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 /* Do not dump I/O mapped devices or special mappings */
1217 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1218 return 0;
1219
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001220 /* By default, dump shared memory if mapped from an anonymous file. */
1221 if (vma->vm_flags & VM_SHARED) {
1222 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0)
1223 return test_bit(MMF_DUMP_ANON_SHARED, &mm_flags);
1224 else
1225 return test_bit(MMF_DUMP_MAPPED_SHARED, &mm_flags);
1226 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001228 /* By default, if it hasn't been written to, don't write it out. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 if (!vma->anon_vma)
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001230 return test_bit(MMF_DUMP_MAPPED_PRIVATE, &mm_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001232 return test_bit(MMF_DUMP_ANON_PRIVATE, &mm_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233}
1234
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235/* An ELF note in memory */
1236struct memelfnote
1237{
1238 const char *name;
1239 int type;
1240 unsigned int datasz;
1241 void *data;
1242};
1243
1244static int notesize(struct memelfnote *en)
1245{
1246 int sz;
1247
1248 sz = sizeof(struct elf_note);
1249 sz += roundup(strlen(en->name) + 1, 4);
1250 sz += roundup(en->datasz, 4);
1251
1252 return sz;
1253}
1254
Andi Kleend025c9d2006-09-30 23:29:28 -07001255#define DUMP_WRITE(addr, nr, foffset) \
1256 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257
Andi Kleend025c9d2006-09-30 23:29:28 -07001258static int alignfile(struct file *file, loff_t *foffset)
1259{
Petr Vandroveca7a0d862006-10-13 18:42:07 +02001260 static const char buf[4] = { 0, };
Andi Kleend025c9d2006-09-30 23:29:28 -07001261 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1262 return 1;
1263}
1264
1265static int writenote(struct memelfnote *men, struct file *file,
1266 loff_t *foffset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267{
1268 struct elf_note en;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269 en.n_namesz = strlen(men->name) + 1;
1270 en.n_descsz = men->datasz;
1271 en.n_type = men->type;
1272
Andi Kleend025c9d2006-09-30 23:29:28 -07001273 DUMP_WRITE(&en, sizeof(en), foffset);
1274 DUMP_WRITE(men->name, en.n_namesz, foffset);
1275 if (!alignfile(file, foffset))
1276 return 0;
1277 DUMP_WRITE(men->data, men->datasz, foffset);
1278 if (!alignfile(file, foffset))
1279 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280
1281 return 1;
1282}
1283#undef DUMP_WRITE
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284
1285#define DUMP_WRITE(addr, nr) \
1286 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1287 goto end_coredump;
1288#define DUMP_SEEK(off) \
1289 if (!dump_seek(file, (off))) \
1290 goto end_coredump;
1291
Arjan van de Ven858119e2006-01-14 13:20:43 -08001292static void fill_elf_header(struct elfhdr *elf, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293{
1294 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1295 elf->e_ident[EI_CLASS] = ELF_CLASS;
1296 elf->e_ident[EI_DATA] = ELF_DATA;
1297 elf->e_ident[EI_VERSION] = EV_CURRENT;
1298 elf->e_ident[EI_OSABI] = ELF_OSABI;
1299 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1300
1301 elf->e_type = ET_CORE;
1302 elf->e_machine = ELF_ARCH;
1303 elf->e_version = EV_CURRENT;
1304 elf->e_entry = 0;
1305 elf->e_phoff = sizeof(struct elfhdr);
1306 elf->e_shoff = 0;
1307 elf->e_flags = ELF_CORE_EFLAGS;
1308 elf->e_ehsize = sizeof(struct elfhdr);
1309 elf->e_phentsize = sizeof(struct elf_phdr);
1310 elf->e_phnum = segs;
1311 elf->e_shentsize = 0;
1312 elf->e_shnum = 0;
1313 elf->e_shstrndx = 0;
1314 return;
1315}
1316
Andrew Morton8d6b5eee2006-09-25 23:32:04 -07001317static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318{
1319 phdr->p_type = PT_NOTE;
1320 phdr->p_offset = offset;
1321 phdr->p_vaddr = 0;
1322 phdr->p_paddr = 0;
1323 phdr->p_filesz = sz;
1324 phdr->p_memsz = 0;
1325 phdr->p_flags = 0;
1326 phdr->p_align = 0;
1327 return;
1328}
1329
1330static void fill_note(struct memelfnote *note, const char *name, int type,
1331 unsigned int sz, void *data)
1332{
1333 note->name = name;
1334 note->type = type;
1335 note->datasz = sz;
1336 note->data = data;
1337 return;
1338}
1339
1340/*
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001341 * fill up all the fields in prstatus from the given task struct, except
1342 * registers which need to be filled up separately.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 */
1344static void fill_prstatus(struct elf_prstatus *prstatus,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001345 struct task_struct *p, long signr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346{
1347 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1348 prstatus->pr_sigpend = p->pending.signal.sig[0];
1349 prstatus->pr_sighold = p->blocked.sig[0];
1350 prstatus->pr_pid = p->pid;
1351 prstatus->pr_ppid = p->parent->pid;
1352 prstatus->pr_pgrp = process_group(p);
Cedric Le Goater937949d2006-12-08 02:37:54 -08001353 prstatus->pr_sid = process_session(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 if (thread_group_leader(p)) {
1355 /*
1356 * This is the record for the group leader. Add in the
1357 * cumulative times of previous dead threads. This total
1358 * won't include the time of each live thread whose state
1359 * is included in the core dump. The final total reported
1360 * to our parent process when it calls wait4 will include
1361 * those sums as well as the little bit more time it takes
1362 * this and each other thread to finish dying after the
1363 * core dump synchronization phase.
1364 */
1365 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1366 &prstatus->pr_utime);
1367 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1368 &prstatus->pr_stime);
1369 } else {
1370 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1371 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1372 }
1373 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1374 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1375}
1376
1377static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1378 struct mm_struct *mm)
1379{
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -07001380 unsigned int i, len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381
1382 /* first copy the parameters from user space */
1383 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1384
1385 len = mm->arg_end - mm->arg_start;
1386 if (len >= ELF_PRARGSZ)
1387 len = ELF_PRARGSZ-1;
1388 if (copy_from_user(&psinfo->pr_psargs,
1389 (const char __user *)mm->arg_start, len))
1390 return -EFAULT;
1391 for(i = 0; i < len; i++)
1392 if (psinfo->pr_psargs[i] == 0)
1393 psinfo->pr_psargs[i] = ' ';
1394 psinfo->pr_psargs[len] = 0;
1395
1396 psinfo->pr_pid = p->pid;
1397 psinfo->pr_ppid = p->parent->pid;
1398 psinfo->pr_pgrp = process_group(p);
Cedric Le Goater937949d2006-12-08 02:37:54 -08001399 psinfo->pr_sid = process_session(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400
1401 i = p->state ? ffz(~p->state) + 1 : 0;
1402 psinfo->pr_state = i;
Carsten Otte55148542006-03-25 03:08:22 -08001403 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1405 psinfo->pr_nice = task_nice(p);
1406 psinfo->pr_flag = p->flags;
1407 SET_UID(psinfo->pr_uid, p->uid);
1408 SET_GID(psinfo->pr_gid, p->gid);
1409 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1410
1411 return 0;
1412}
1413
1414/* Here is the structure in which status of each thread is captured. */
1415struct elf_thread_status
1416{
1417 struct list_head list;
1418 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1419 elf_fpregset_t fpu; /* NT_PRFPREG */
1420 struct task_struct *thread;
1421#ifdef ELF_CORE_COPY_XFPREGS
Mark Nelson5b20cd82007-10-16 23:25:39 -07001422 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423#endif
1424 struct memelfnote notes[3];
1425 int num_notes;
1426};
1427
1428/*
1429 * In order to add the specific thread information for the elf file format,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001430 * we need to keep a linked list of every threads pr_status and then create
1431 * a single section for them in the final core file.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 */
1433static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1434{
1435 int sz = 0;
1436 struct task_struct *p = t->thread;
1437 t->num_notes = 0;
1438
1439 fill_prstatus(&t->prstatus, p, signr);
1440 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1441
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001442 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1443 &(t->prstatus));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 t->num_notes++;
1445 sz += notesize(&t->notes[0]);
1446
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001447 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1448 &t->fpu))) {
1449 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1450 &(t->fpu));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451 t->num_notes++;
1452 sz += notesize(&t->notes[1]);
1453 }
1454
1455#ifdef ELF_CORE_COPY_XFPREGS
1456 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
Mark Nelson5b20cd82007-10-16 23:25:39 -07001457 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1458 sizeof(t->xfpu), &t->xfpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 t->num_notes++;
1460 sz += notesize(&t->notes[2]);
1461 }
1462#endif
1463 return sz;
1464}
1465
Roland McGrathf47aef52007-01-26 00:56:49 -08001466static struct vm_area_struct *first_vma(struct task_struct *tsk,
1467 struct vm_area_struct *gate_vma)
1468{
1469 struct vm_area_struct *ret = tsk->mm->mmap;
1470
1471 if (ret)
1472 return ret;
1473 return gate_vma;
1474}
1475/*
1476 * Helper function for iterating across a vma list. It ensures that the caller
1477 * will visit `gate_vma' prior to terminating the search.
1478 */
1479static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1480 struct vm_area_struct *gate_vma)
1481{
1482 struct vm_area_struct *ret;
1483
1484 ret = this_vma->vm_next;
1485 if (ret)
1486 return ret;
1487 if (this_vma == gate_vma)
1488 return NULL;
1489 return gate_vma;
1490}
1491
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492/*
1493 * Actual dumper
1494 *
1495 * This is a two-pass process; first we find the offsets of the bits,
1496 * and then they are actually written out. If we run out of core limit
1497 * we just truncate.
1498 */
Neil Horman7dc0b222007-10-16 23:26:34 -07001499static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500{
1501#define NUM_NOTES 6
1502 int has_dumped = 0;
1503 mm_segment_t fs;
1504 int segs;
1505 size_t size = 0;
1506 int i;
Roland McGrathf47aef52007-01-26 00:56:49 -08001507 struct vm_area_struct *vma, *gate_vma;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 struct elfhdr *elf = NULL;
Andi Kleend025c9d2006-09-30 23:29:28 -07001509 loff_t offset = 0, dataoff, foffset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 int numnote;
1511 struct memelfnote *notes = NULL;
1512 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1513 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1514 struct task_struct *g, *p;
1515 LIST_HEAD(thread_list);
1516 struct list_head *t;
1517 elf_fpregset_t *fpu = NULL;
1518#ifdef ELF_CORE_COPY_XFPREGS
1519 elf_fpxregset_t *xfpu = NULL;
1520#endif
1521 int thread_status_size = 0;
1522 elf_addr_t *auxv;
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001523 unsigned long mm_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524
1525 /*
1526 * We no longer stop all VM operations.
1527 *
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001528 * This is because those proceses that could possibly change map_count
1529 * or the mmap / vma pages are now blocked in do_exit on current
1530 * finishing this core dump.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 *
1532 * Only ptrace can touch these memory addresses, but it doesn't change
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001533 * the map_count or the pages allocated. So no possibility of crashing
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 * exists while dumping the mm->vm_next areas to the core file.
1535 */
1536
1537 /* alloc memory for large data structures: too large to be on stack */
1538 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1539 if (!elf)
1540 goto cleanup;
1541 prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1542 if (!prstatus)
1543 goto cleanup;
1544 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1545 if (!psinfo)
1546 goto cleanup;
1547 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1548 if (!notes)
1549 goto cleanup;
1550 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1551 if (!fpu)
1552 goto cleanup;
1553#ifdef ELF_CORE_COPY_XFPREGS
1554 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1555 if (!xfpu)
1556 goto cleanup;
1557#endif
1558
1559 if (signr) {
1560 struct elf_thread_status *tmp;
Oleg Nesterov486ccb02006-09-29 02:00:24 -07001561 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 do_each_thread(g,p)
1563 if (current->mm == p->mm && current != p) {
Oliver Neukum11b0b5a2006-03-25 03:08:13 -08001564 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565 if (!tmp) {
Oleg Nesterov486ccb02006-09-29 02:00:24 -07001566 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 goto cleanup;
1568 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 tmp->thread = p;
1570 list_add(&tmp->list, &thread_list);
1571 }
1572 while_each_thread(g,p);
Oleg Nesterov486ccb02006-09-29 02:00:24 -07001573 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 list_for_each(t, &thread_list) {
1575 struct elf_thread_status *tmp;
1576 int sz;
1577
1578 tmp = list_entry(t, struct elf_thread_status, list);
1579 sz = elf_dump_thread_status(signr, tmp);
1580 thread_status_size += sz;
1581 }
1582 }
1583 /* now collect the dump for the current */
1584 memset(prstatus, 0, sizeof(*prstatus));
1585 fill_prstatus(prstatus, current, signr);
1586 elf_core_copy_regs(&prstatus->pr_reg, regs);
1587
1588 segs = current->mm->map_count;
1589#ifdef ELF_CORE_EXTRA_PHDRS
1590 segs += ELF_CORE_EXTRA_PHDRS;
1591#endif
1592
Roland McGrathf47aef52007-01-26 00:56:49 -08001593 gate_vma = get_gate_vma(current);
1594 if (gate_vma != NULL)
1595 segs++;
1596
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 /* Set up header */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001598 fill_elf_header(elf, segs + 1); /* including notes section */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599
1600 has_dumped = 1;
1601 current->flags |= PF_DUMPCORE;
1602
1603 /*
1604 * Set up the notes in similar form to SVR4 core dumps made
1605 * with info from their /proc.
1606 */
1607
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001608 fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 fill_psinfo(psinfo, current->group_leader, current->mm);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001610 fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611
Eric W. Biedermana9289722005-10-30 15:02:08 -08001612 numnote = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001614 auxv = (elf_addr_t *)current->mm->saved_auxv;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615
1616 i = 0;
1617 do
1618 i += 2;
1619 while (auxv[i - 2] != AT_NULL);
1620 fill_note(&notes[numnote++], "CORE", NT_AUXV,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001621 i * sizeof(elf_addr_t), auxv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622
1623 /* Try to dump the FPU. */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001624 if ((prstatus->pr_fpvalid =
1625 elf_core_copy_task_fpregs(current, regs, fpu)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 fill_note(notes + numnote++,
1627 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1628#ifdef ELF_CORE_COPY_XFPREGS
1629 if (elf_core_copy_task_xfpregs(current, xfpu))
1630 fill_note(notes + numnote++,
Mark Nelson5b20cd82007-10-16 23:25:39 -07001631 "LINUX", ELF_CORE_XFPREG_TYPE, sizeof(*xfpu), xfpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632#endif
1633
1634 fs = get_fs();
1635 set_fs(KERNEL_DS);
1636
1637 DUMP_WRITE(elf, sizeof(*elf));
1638 offset += sizeof(*elf); /* Elf header */
Petr Vandroveca7a0d862006-10-13 18:42:07 +02001639 offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1640 foffset = offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641
1642 /* Write notes phdr entry */
1643 {
1644 struct elf_phdr phdr;
1645 int sz = 0;
1646
1647 for (i = 0; i < numnote; i++)
1648 sz += notesize(notes + i);
1649
1650 sz += thread_status_size;
1651
Michael Ellermane5501492007-09-19 14:38:12 +10001652 sz += elf_coredump_extra_notes_size();
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001653
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 fill_elf_note_phdr(&phdr, sz, offset);
1655 offset += sz;
1656 DUMP_WRITE(&phdr, sizeof(phdr));
1657 }
1658
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1660
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001661 /*
1662 * We must use the same mm->flags while dumping core to avoid
1663 * inconsistency between the program headers and bodies, otherwise an
1664 * unusable core file can be generated.
1665 */
1666 mm_flags = current->mm->flags;
1667
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 /* Write program headers for segments dump */
Roland McGrathf47aef52007-01-26 00:56:49 -08001669 for (vma = first_vma(current, gate_vma); vma != NULL;
1670 vma = next_vma(vma, gate_vma)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671 struct elf_phdr phdr;
1672 size_t sz;
1673
1674 sz = vma->vm_end - vma->vm_start;
1675
1676 phdr.p_type = PT_LOAD;
1677 phdr.p_offset = offset;
1678 phdr.p_vaddr = vma->vm_start;
1679 phdr.p_paddr = 0;
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001680 phdr.p_filesz = maydump(vma, mm_flags) ? sz : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 phdr.p_memsz = sz;
1682 offset += phdr.p_filesz;
1683 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001684 if (vma->vm_flags & VM_WRITE)
1685 phdr.p_flags |= PF_W;
1686 if (vma->vm_flags & VM_EXEC)
1687 phdr.p_flags |= PF_X;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 phdr.p_align = ELF_EXEC_PAGESIZE;
1689
1690 DUMP_WRITE(&phdr, sizeof(phdr));
1691 }
1692
1693#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1694 ELF_CORE_WRITE_EXTRA_PHDRS;
1695#endif
1696
1697 /* write out the notes section */
1698 for (i = 0; i < numnote; i++)
Andi Kleend025c9d2006-09-30 23:29:28 -07001699 if (!writenote(notes + i, file, &foffset))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 goto end_coredump;
1701
Michael Ellermane5501492007-09-19 14:38:12 +10001702 if (elf_coredump_extra_notes_write(file, &foffset))
1703 goto end_coredump;
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001704
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 /* write out the thread status notes section */
1706 list_for_each(t, &thread_list) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001707 struct elf_thread_status *tmp =
1708 list_entry(t, struct elf_thread_status, list);
1709
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 for (i = 0; i < tmp->num_notes; i++)
Andi Kleend025c9d2006-09-30 23:29:28 -07001711 if (!writenote(&tmp->notes[i], file, &foffset))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 goto end_coredump;
1713 }
Andi Kleend025c9d2006-09-30 23:29:28 -07001714
1715 /* Align to page */
1716 DUMP_SEEK(dataoff - foffset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717
Roland McGrathf47aef52007-01-26 00:56:49 -08001718 for (vma = first_vma(current, gate_vma); vma != NULL;
1719 vma = next_vma(vma, gate_vma)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720 unsigned long addr;
1721
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001722 if (!maydump(vma, mm_flags))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 continue;
1724
1725 for (addr = vma->vm_start;
1726 addr < vma->vm_end;
1727 addr += PAGE_SIZE) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001728 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729 struct vm_area_struct *vma;
1730
1731 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1732 &page, &vma) <= 0) {
Andi Kleend025c9d2006-09-30 23:29:28 -07001733 DUMP_SEEK(PAGE_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 } else {
Nick Piggin557ed1f2007-10-16 01:24:40 -07001735 if (page == ZERO_PAGE(0)) {
Brian Pomerantz03221702007-04-01 23:49:41 -07001736 if (!dump_seek(file, PAGE_SIZE)) {
1737 page_cache_release(page);
1738 goto end_coredump;
1739 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 } else {
1741 void *kaddr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001742 flush_cache_page(vma, addr,
1743 page_to_pfn(page));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 kaddr = kmap(page);
1745 if ((size += PAGE_SIZE) > limit ||
1746 !dump_write(file, kaddr,
1747 PAGE_SIZE)) {
1748 kunmap(page);
1749 page_cache_release(page);
1750 goto end_coredump;
1751 }
1752 kunmap(page);
1753 }
1754 page_cache_release(page);
1755 }
1756 }
1757 }
1758
1759#ifdef ELF_CORE_WRITE_EXTRA_DATA
1760 ELF_CORE_WRITE_EXTRA_DATA;
1761#endif
1762
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763end_coredump:
1764 set_fs(fs);
1765
1766cleanup:
Jesper Juhl74da6cd2006-01-11 01:51:26 +01001767 while (!list_empty(&thread_list)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 struct list_head *tmp = thread_list.next;
1769 list_del(tmp);
1770 kfree(list_entry(tmp, struct elf_thread_status, list));
1771 }
1772
1773 kfree(elf);
1774 kfree(prstatus);
1775 kfree(psinfo);
1776 kfree(notes);
1777 kfree(fpu);
1778#ifdef ELF_CORE_COPY_XFPREGS
1779 kfree(xfpu);
1780#endif
1781 return has_dumped;
1782#undef NUM_NOTES
1783}
1784
1785#endif /* USE_ELF_CORE_DUMP */
1786
1787static int __init init_elf_binfmt(void)
1788{
1789 return register_binfmt(&elf_format);
1790}
1791
1792static void __exit exit_elf_binfmt(void)
1793{
1794 /* Remove the COFF and ELF loaders. */
1795 unregister_binfmt(&elf_format);
1796}
1797
1798core_initcall(init_elf_binfmt);
1799module_exit(exit_elf_binfmt);
1800MODULE_LICENSE("GPL");