blob: ba24cb2ff6ceb83859f782677372ded1c69b20c2 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/fs.h>
15#include <linux/stat.h>
16#include <linux/time.h>
17#include <linux/mm.h>
18#include <linux/mman.h>
19#include <linux/a.out.h>
20#include <linux/errno.h>
21#include <linux/signal.h>
22#include <linux/binfmts.h>
23#include <linux/string.h>
24#include <linux/file.h>
25#include <linux/fcntl.h>
26#include <linux/ptrace.h>
27#include <linux/slab.h>
28#include <linux/shm.h>
29#include <linux/personality.h>
30#include <linux/elfcore.h>
31#include <linux/init.h>
32#include <linux/highuid.h>
33#include <linux/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <linux/compiler.h>
35#include <linux/highmem.h>
36#include <linux/pagemap.h>
37#include <linux/security.h>
38#include <linux/syscalls.h>
39#include <linux/random.h>
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070040#include <linux/elf.h>
Alexey Dobriyan7e80d0d2007-05-08 00:28:59 -070041#include <linux/utsname.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <asm/uaccess.h>
43#include <asm/param.h>
44#include <asm/page.h>
45
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070046static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47static int load_elf_library(struct file *);
Jan Kratochvil60bfba72007-07-15 23:40:06 -070048static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
Linus Torvalds1da177e2005-04-16 15:20:36 -070050/*
51 * If we don't support core dumping, then supply a NULL so we
52 * don't even try.
53 */
Matt Mackall708e9a72006-01-08 01:05:25 -080054#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070055static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#else
57#define elf_core_dump NULL
58#endif
59
60#if ELF_EXEC_PAGESIZE > PAGE_SIZE
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070061#define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#else
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070063#define ELF_MIN_ALIGN PAGE_SIZE
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#endif
65
66#ifndef ELF_CORE_EFLAGS
67#define ELF_CORE_EFLAGS 0
68#endif
69
70#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74static struct linux_binfmt elf_format = {
75 .module = THIS_MODULE,
76 .load_binary = load_elf_binary,
77 .load_shlib = load_elf_library,
78 .core_dump = elf_core_dump,
Andi Kleen9fbbd4d2007-02-13 13:26:26 +010079 .min_coredump = ELF_EXEC_PAGESIZE,
80 .hasvdso = 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070081};
82
Jan Kratochvil60bfba72007-07-15 23:40:06 -070083#define BAD_ADDR(x) IS_ERR_VALUE(x)
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int set_brk(unsigned long start, unsigned long end)
86{
87 start = ELF_PAGEALIGN(start);
88 end = ELF_PAGEALIGN(end);
89 if (end > start) {
90 unsigned long addr;
91 down_write(&current->mm->mmap_sem);
92 addr = do_brk(start, end - start);
93 up_write(&current->mm->mmap_sem);
94 if (BAD_ADDR(addr))
95 return addr;
96 }
97 current->mm->start_brk = current->mm->brk = end;
98 return 0;
99}
100
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101/* We need to explicitly zero any fractional pages
102 after the data section (i.e. bss). This would
103 contain the junk from the file that should not
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700104 be in memory
105 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106static int padzero(unsigned long elf_bss)
107{
108 unsigned long nbyte;
109
110 nbyte = ELF_PAGEOFFSET(elf_bss);
111 if (nbyte) {
112 nbyte = ELF_MIN_ALIGN - nbyte;
113 if (clear_user((void __user *) elf_bss, nbyte))
114 return -EFAULT;
115 }
116 return 0;
117}
118
119/* Let's use some macros to make this stack manipulation a litle clearer */
120#ifdef CONFIG_STACK_GROWSUP
121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122#define STACK_ROUND(sp, items) \
123 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700124#define STACK_ALLOC(sp, len) ({ \
125 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 old_sp; })
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127#else
128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129#define STACK_ROUND(sp, items) \
130 (((unsigned long) (sp - items)) &~ 15UL)
131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132#endif
133
134static int
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700135create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136 int interp_aout, unsigned long load_addr,
137 unsigned long interp_load_addr)
138{
139 unsigned long p = bprm->p;
140 int argc = bprm->argc;
141 int envc = bprm->envc;
142 elf_addr_t __user *argv;
143 elf_addr_t __user *envp;
144 elf_addr_t __user *sp;
145 elf_addr_t __user *u_platform;
146 const char *k_platform = ELF_PLATFORM;
147 int items;
148 elf_addr_t *elf_info;
149 int ei_index = 0;
150 struct task_struct *tsk = current;
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700151 struct vm_area_struct *vma;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
153 /*
154 * If this architecture has a platform capability string, copy it
155 * to userspace. In some cases (Sparc), this info is impossible
156 * for userspace to get any other way, in others (i386) it is
157 * merely difficult.
158 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 u_platform = NULL;
160 if (k_platform) {
161 size_t len = strlen(k_platform) + 1;
162
163 /*
164 * In some cases (e.g. Hyper-Threading), we want to avoid L1
165 * evictions by the processes running on the same package. One
166 * thing we can do is to shuffle the initial stack for them.
167 */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700168
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 p = arch_align_stack(p);
170
171 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
172 if (__copy_to_user(u_platform, k_platform, len))
173 return -EFAULT;
174 }
175
176 /* Create the ELF interpreter info */
Jesper Juhl785d5572006-06-23 02:05:35 -0700177 elf_info = (elf_addr_t *)current->mm->saved_auxv;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178#define NEW_AUX_ENT(id, val) \
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700179 do { \
Jesper Juhl785d5572006-06-23 02:05:35 -0700180 elf_info[ei_index++] = id; \
181 elf_info[ei_index++] = val; \
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700182 } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
184#ifdef ARCH_DLINFO
185 /*
186 * ARCH_DLINFO must come first so PPC can do its special alignment of
187 * AUXV.
188 */
189 ARCH_DLINFO;
190#endif
191 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
192 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
193 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
194 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700195 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
197 NEW_AUX_ENT(AT_BASE, interp_load_addr);
198 NEW_AUX_ENT(AT_FLAGS, 0);
199 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
Jesper Juhl785d5572006-06-23 02:05:35 -0700200 NEW_AUX_ENT(AT_UID, tsk->uid);
201 NEW_AUX_ENT(AT_EUID, tsk->euid);
202 NEW_AUX_ENT(AT_GID, tsk->gid);
203 NEW_AUX_ENT(AT_EGID, tsk->egid);
204 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 if (k_platform) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700206 NEW_AUX_ENT(AT_PLATFORM,
Jesper Juhl785d5572006-06-23 02:05:35 -0700207 (elf_addr_t)(unsigned long)u_platform);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 }
209 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
Jesper Juhl785d5572006-06-23 02:05:35 -0700210 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 }
212#undef NEW_AUX_ENT
213 /* AT_NULL is zero; clear the rest too */
214 memset(&elf_info[ei_index], 0,
215 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
216
217 /* And advance past the AT_NULL entry. */
218 ei_index += 2;
219
220 sp = STACK_ADD(p, ei_index);
221
222 items = (argc + 1) + (envc + 1);
223 if (interp_aout) {
224 items += 3; /* a.out interpreters require argv & envp too */
225 } else {
226 items += 1; /* ELF interpreters only put argc on the stack */
227 }
228 bprm->p = STACK_ROUND(sp, items);
229
230 /* Point sp at the lowest address on the stack */
231#ifdef CONFIG_STACK_GROWSUP
232 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700233 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234#else
235 sp = (elf_addr_t __user *)bprm->p;
236#endif
237
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700238
239 /*
240 * Grow the stack manually; some architectures have a limit on how
241 * far ahead a user-space access may be in order to grow the stack.
242 */
243 vma = find_extend_vma(current->mm, bprm->p);
244 if (!vma)
245 return -EFAULT;
246
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
248 if (__put_user(argc, sp++))
249 return -EFAULT;
250 if (interp_aout) {
251 argv = sp + 2;
252 envp = argv + argc + 1;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800253 if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
254 __put_user((elf_addr_t)(unsigned long)envp, sp++))
255 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 } else {
257 argv = sp;
258 envp = argv + argc + 1;
259 }
260
261 /* Populate argv and envp */
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -0700262 p = current->mm->arg_end = current->mm->arg_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 while (argc-- > 0) {
264 size_t len;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800265 if (__put_user((elf_addr_t)p, argv++))
266 return -EFAULT;
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700267 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
268 if (!len || len > MAX_ARG_STRLEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 return 0;
270 p += len;
271 }
272 if (__put_user(0, argv))
273 return -EFAULT;
274 current->mm->arg_end = current->mm->env_start = p;
275 while (envc-- > 0) {
276 size_t len;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800277 if (__put_user((elf_addr_t)p, envp++))
278 return -EFAULT;
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700279 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
280 if (!len || len > MAX_ARG_STRLEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 return 0;
282 p += len;
283 }
284 if (__put_user(0, envp))
285 return -EFAULT;
286 current->mm->env_end = p;
287
288 /* Put the elf_info on the stack in the right place. */
289 sp = (elf_addr_t __user *)envp + 1;
290 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
291 return -EFAULT;
292 return 0;
293}
294
295#ifndef elf_map
296
297static unsigned long elf_map(struct file *filep, unsigned long addr,
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700298 struct elf_phdr *eppnt, int prot, int type,
299 unsigned long total_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300{
301 unsigned long map_addr;
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700302 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
303 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
304 addr = ELF_PAGESTART(addr);
305 size = ELF_PAGEALIGN(size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306
David Gibsondda6ebd2006-01-08 01:03:35 -0800307 /* mmap() will return -EINVAL if given a zero size, but a
308 * segment with zero filesize is perfectly valid */
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700309 if (!size)
310 return addr;
311
312 down_write(&current->mm->mmap_sem);
313 /*
314 * total_size is the size of the ELF (interpreter) image.
315 * The _first_ mmap needs to know the full size, otherwise
316 * randomization might put this image into an overlapping
317 * position with the ELF binary image. (since size < total_size)
318 * So we first map the 'big' image - and unmap the remainder at
319 * the end. (which unmap is needed for ELF images with holes.)
320 */
321 if (total_size) {
322 total_size = ELF_PAGEALIGN(total_size);
323 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
324 if (!BAD_ADDR(map_addr))
325 do_munmap(current->mm, map_addr+size, total_size-size);
326 } else
327 map_addr = do_mmap(filep, addr, size, prot, type, off);
328
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 up_write(&current->mm->mmap_sem);
330 return(map_addr);
331}
332
333#endif /* !elf_map */
334
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700335static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
336{
337 int i, first_idx = -1, last_idx = -1;
338
339 for (i = 0; i < nr; i++) {
340 if (cmds[i].p_type == PT_LOAD) {
341 last_idx = i;
342 if (first_idx == -1)
343 first_idx = i;
344 }
345 }
346 if (first_idx == -1)
347 return 0;
348
349 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
350 ELF_PAGESTART(cmds[first_idx].p_vaddr);
351}
352
353
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354/* This is much more generalized than the library routine read function,
355 so we keep this separate. Technically the library read function
356 is only provided so that we can read a.out libraries that have
357 an ELF header */
358
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700359static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700360 struct file *interpreter, unsigned long *interp_map_addr,
361 unsigned long no_base)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362{
363 struct elf_phdr *elf_phdata;
364 struct elf_phdr *eppnt;
365 unsigned long load_addr = 0;
366 int load_addr_set = 0;
367 unsigned long last_bss = 0, elf_bss = 0;
368 unsigned long error = ~0UL;
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700369 unsigned long total_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 int retval, i, size;
371
372 /* First of all, some simple consistency checks */
373 if (interp_elf_ex->e_type != ET_EXEC &&
374 interp_elf_ex->e_type != ET_DYN)
375 goto out;
376 if (!elf_check_arch(interp_elf_ex))
377 goto out;
378 if (!interpreter->f_op || !interpreter->f_op->mmap)
379 goto out;
380
381 /*
382 * If the size of this structure has changed, then punt, since
383 * we will be doing the wrong thing.
384 */
385 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
386 goto out;
387 if (interp_elf_ex->e_phnum < 1 ||
388 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
389 goto out;
390
391 /* Now read in all of the header information */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
393 if (size > ELF_MIN_ALIGN)
394 goto out;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700395 elf_phdata = kmalloc(size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 if (!elf_phdata)
397 goto out;
398
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700399 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
400 (char *)elf_phdata,size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 error = -EIO;
402 if (retval != size) {
403 if (retval < 0)
404 error = retval;
405 goto out_close;
406 }
407
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700408 total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
409 if (!total_size) {
410 error = -EINVAL;
411 goto out_close;
412 }
413
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 eppnt = elf_phdata;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700415 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
416 if (eppnt->p_type == PT_LOAD) {
417 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
418 int elf_prot = 0;
419 unsigned long vaddr = 0;
420 unsigned long k, map_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700422 if (eppnt->p_flags & PF_R)
423 elf_prot = PROT_READ;
424 if (eppnt->p_flags & PF_W)
425 elf_prot |= PROT_WRITE;
426 if (eppnt->p_flags & PF_X)
427 elf_prot |= PROT_EXEC;
428 vaddr = eppnt->p_vaddr;
429 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
430 elf_type |= MAP_FIXED;
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700431 else if (no_base && interp_elf_ex->e_type == ET_DYN)
432 load_addr = -vaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700434 map_addr = elf_map(interpreter, load_addr + vaddr,
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700435 eppnt, elf_prot, elf_type, total_size);
436 total_size = 0;
437 if (!*interp_map_addr)
438 *interp_map_addr = map_addr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700439 error = map_addr;
440 if (BAD_ADDR(map_addr))
441 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700443 if (!load_addr_set &&
444 interp_elf_ex->e_type == ET_DYN) {
445 load_addr = map_addr - ELF_PAGESTART(vaddr);
446 load_addr_set = 1;
447 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700449 /*
450 * Check to see if the section's size will overflow the
451 * allowed task size. Note that p_filesz must always be
452 * <= p_memsize so it's only necessary to check p_memsz.
453 */
454 k = load_addr + eppnt->p_vaddr;
Chuck Ebbertce510592006-07-03 00:24:14 -0700455 if (BAD_ADDR(k) ||
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700456 eppnt->p_filesz > eppnt->p_memsz ||
457 eppnt->p_memsz > TASK_SIZE ||
458 TASK_SIZE - eppnt->p_memsz < k) {
459 error = -ENOMEM;
460 goto out_close;
461 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700463 /*
464 * Find the end of the file mapping for this phdr, and
465 * keep track of the largest address we see for this.
466 */
467 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
468 if (k > elf_bss)
469 elf_bss = k;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700471 /*
472 * Do the same thing for the memory mapping - between
473 * elf_bss and last_bss is the bss section.
474 */
475 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
476 if (k > last_bss)
477 last_bss = k;
478 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 }
480
481 /*
482 * Now fill out the bss section. First pad the last page up
483 * to the page boundary, and then perform a mmap to make sure
484 * that there are zero-mapped pages up to and including the
485 * last bss page.
486 */
487 if (padzero(elf_bss)) {
488 error = -EFAULT;
489 goto out_close;
490 }
491
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700492 /* What we have mapped so far */
493 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494
495 /* Map the last of the bss segment */
496 if (last_bss > elf_bss) {
497 down_write(&current->mm->mmap_sem);
498 error = do_brk(elf_bss, last_bss - elf_bss);
499 up_write(&current->mm->mmap_sem);
500 if (BAD_ADDR(error))
501 goto out_close;
502 }
503
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700504 error = load_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505
506out_close:
507 kfree(elf_phdata);
508out:
509 return error;
510}
511
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700512static unsigned long load_aout_interp(struct exec *interp_ex,
513 struct file *interpreter)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514{
515 unsigned long text_data, elf_entry = ~0UL;
516 char __user * addr;
517 loff_t offset;
518
519 current->mm->end_code = interp_ex->a_text;
520 text_data = interp_ex->a_text + interp_ex->a_data;
521 current->mm->end_data = text_data;
522 current->mm->brk = interp_ex->a_bss + text_data;
523
524 switch (N_MAGIC(*interp_ex)) {
525 case OMAGIC:
526 offset = 32;
527 addr = (char __user *)0;
528 break;
529 case ZMAGIC:
530 case QMAGIC:
531 offset = N_TXTOFF(*interp_ex);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700532 addr = (char __user *)N_TXTADDR(*interp_ex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 break;
534 default:
535 goto out;
536 }
537
538 down_write(&current->mm->mmap_sem);
539 do_brk(0, text_data);
540 up_write(&current->mm->mmap_sem);
541 if (!interpreter->f_op || !interpreter->f_op->read)
542 goto out;
543 if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
544 goto out;
545 flush_icache_range((unsigned long)addr,
546 (unsigned long)addr + text_data);
547
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 down_write(&current->mm->mmap_sem);
549 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
550 interp_ex->a_bss);
551 up_write(&current->mm->mmap_sem);
552 elf_entry = interp_ex->a_entry;
553
554out:
555 return elf_entry;
556}
557
558/*
559 * These are the functions used to load ELF style executables and shared
560 * libraries. There is no binary dependent code anywhere else.
561 */
562
563#define INTERPRETER_NONE 0
564#define INTERPRETER_AOUT 1
565#define INTERPRETER_ELF 2
566
Andi Kleen913bd902006-03-25 16:29:09 +0100567#ifndef STACK_RND_MASK
James Bottomleyd1cabd62007-03-16 13:38:35 -0800568#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
Andi Kleen913bd902006-03-25 16:29:09 +0100569#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570
571static unsigned long randomize_stack_top(unsigned long stack_top)
572{
573 unsigned int random_variable = 0;
574
Andi Kleenc16b63e2006-09-26 10:52:28 +0200575 if ((current->flags & PF_RANDOMIZE) &&
576 !(current->personality & ADDR_NO_RANDOMIZE)) {
Andi Kleen913bd902006-03-25 16:29:09 +0100577 random_variable = get_random_int() & STACK_RND_MASK;
578 random_variable <<= PAGE_SHIFT;
579 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580#ifdef CONFIG_STACK_GROWSUP
Andi Kleen913bd902006-03-25 16:29:09 +0100581 return PAGE_ALIGN(stack_top) + random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582#else
Andi Kleen913bd902006-03-25 16:29:09 +0100583 return PAGE_ALIGN(stack_top) - random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584#endif
585}
586
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700587static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588{
589 struct file *interpreter = NULL; /* to shut gcc up */
590 unsigned long load_addr = 0, load_bias = 0;
591 int load_addr_set = 0;
592 char * elf_interpreter = NULL;
593 unsigned int interpreter_type = INTERPRETER_NONE;
594 unsigned char ibcs2_interpreter = 0;
595 unsigned long error;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700596 struct elf_phdr *elf_ppnt, *elf_phdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 unsigned long elf_bss, elf_brk;
598 int elf_exec_fileno;
599 int retval, i;
600 unsigned int size;
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700601 unsigned long elf_entry;
602 unsigned long interp_load_addr = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 unsigned long start_code, end_code, start_data, end_data;
604 unsigned long reloc_func_desc = 0;
605 char passed_fileno[6];
606 struct files_struct *files;
David Rientjes8de61e62006-12-06 20:40:16 -0800607 int executable_stack = EXSTACK_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 unsigned long def_flags = 0;
609 struct {
610 struct elfhdr elf_ex;
611 struct elfhdr interp_elf_ex;
612 struct exec interp_ex;
613 } *loc;
614
615 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
616 if (!loc) {
617 retval = -ENOMEM;
618 goto out_ret;
619 }
620
621 /* Get the exec-header */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700622 loc->elf_ex = *((struct elfhdr *)bprm->buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623
624 retval = -ENOEXEC;
625 /* First of all, some simple consistency checks */
626 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
627 goto out;
628
629 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
630 goto out;
631 if (!elf_check_arch(&loc->elf_ex))
632 goto out;
633 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
634 goto out;
635
636 /* Now read in all of the header information */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
638 goto out;
639 if (loc->elf_ex.e_phnum < 1 ||
640 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
641 goto out;
642 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
643 retval = -ENOMEM;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700644 elf_phdata = kmalloc(size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 if (!elf_phdata)
646 goto out;
647
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700648 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
649 (char *)elf_phdata, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650 if (retval != size) {
651 if (retval >= 0)
652 retval = -EIO;
653 goto out_free_ph;
654 }
655
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700656 files = current->files; /* Refcounted so ok */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 retval = unshare_files();
658 if (retval < 0)
659 goto out_free_ph;
660 if (files == current->files) {
661 put_files_struct(files);
662 files = NULL;
663 }
664
665 /* exec will make our files private anyway, but for the a.out
666 loader stuff we need to do it earlier */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 retval = get_unused_fd();
668 if (retval < 0)
669 goto out_free_fh;
670 get_file(bprm->file);
671 fd_install(elf_exec_fileno = retval, bprm->file);
672
673 elf_ppnt = elf_phdata;
674 elf_bss = 0;
675 elf_brk = 0;
676
677 start_code = ~0UL;
678 end_code = 0;
679 start_data = 0;
680 end_data = 0;
681
682 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
683 if (elf_ppnt->p_type == PT_INTERP) {
684 /* This is the program interpreter used for
685 * shared libraries - for now assume that this
686 * is an a.out format binary
687 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 retval = -ENOEXEC;
689 if (elf_ppnt->p_filesz > PATH_MAX ||
690 elf_ppnt->p_filesz < 2)
691 goto out_free_file;
692
693 retval = -ENOMEM;
Jesper Juhl792db3a2006-01-09 20:54:45 -0800694 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700695 GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 if (!elf_interpreter)
697 goto out_free_file;
698
699 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700700 elf_interpreter,
701 elf_ppnt->p_filesz);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 if (retval != elf_ppnt->p_filesz) {
703 if (retval >= 0)
704 retval = -EIO;
705 goto out_free_interp;
706 }
707 /* make sure path is NULL terminated */
708 retval = -ENOEXEC;
709 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
710 goto out_free_interp;
711
712 /* If the program interpreter is one of these two,
713 * then assume an iBCS2 image. Otherwise assume
714 * a native linux image.
715 */
716 if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
717 strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
718 ibcs2_interpreter = 1;
719
720 /*
721 * The early SET_PERSONALITY here is so that the lookup
722 * for the interpreter happens in the namespace of the
723 * to-be-execed image. SET_PERSONALITY can select an
724 * alternate root.
725 *
726 * However, SET_PERSONALITY is NOT allowed to switch
727 * this task into the new images's memory mapping
728 * policy - that is, TASK_SIZE must still evaluate to
729 * that which is appropriate to the execing application.
730 * This is because exit_mmap() needs to have TASK_SIZE
731 * evaluate to the size of the old image.
732 *
733 * So if (say) a 64-bit application is execing a 32-bit
734 * application it is the architecture's responsibility
735 * to defer changing the value of TASK_SIZE until the
736 * switch really is going to happen - do this in
737 * flush_thread(). - akpm
738 */
739 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
740
741 interpreter = open_exec(elf_interpreter);
742 retval = PTR_ERR(interpreter);
743 if (IS_ERR(interpreter))
744 goto out_free_interp;
Alexey Dobriyan1fb84492007-01-26 00:57:16 -0800745
746 /*
747 * If the binary is not readable then enforce
748 * mm->dumpable = 0 regardless of the interpreter's
749 * permissions.
750 */
751 if (file_permission(interpreter, MAY_READ) < 0)
752 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
753
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700754 retval = kernel_read(interpreter, 0, bprm->buf,
755 BINPRM_BUF_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 if (retval != BINPRM_BUF_SIZE) {
757 if (retval >= 0)
758 retval = -EIO;
759 goto out_free_dentry;
760 }
761
762 /* Get the exec headers */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700763 loc->interp_ex = *((struct exec *)bprm->buf);
764 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 break;
766 }
767 elf_ppnt++;
768 }
769
770 elf_ppnt = elf_phdata;
771 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
772 if (elf_ppnt->p_type == PT_GNU_STACK) {
773 if (elf_ppnt->p_flags & PF_X)
774 executable_stack = EXSTACK_ENABLE_X;
775 else
776 executable_stack = EXSTACK_DISABLE_X;
777 break;
778 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
780 /* Some simple consistency checks for the interpreter */
781 if (elf_interpreter) {
782 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
783
784 /* Now figure out which format our binary is */
785 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
786 (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
787 (N_MAGIC(loc->interp_ex) != QMAGIC))
788 interpreter_type = INTERPRETER_ELF;
789
790 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
791 interpreter_type &= ~INTERPRETER_ELF;
792
793 retval = -ELIBBAD;
794 if (!interpreter_type)
795 goto out_free_dentry;
796
797 /* Make sure only one type was selected */
798 if ((interpreter_type & INTERPRETER_ELF) &&
799 interpreter_type != INTERPRETER_ELF) {
800 // FIXME - ratelimit this before re-enabling
801 // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
802 interpreter_type = INTERPRETER_ELF;
803 }
804 /* Verify the interpreter has a valid arch */
805 if ((interpreter_type == INTERPRETER_ELF) &&
806 !elf_check_arch(&loc->interp_elf_ex))
807 goto out_free_dentry;
808 } else {
809 /* Executables without an interpreter also need a personality */
810 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
811 }
812
813 /* OK, we are done with that, now set up the arg stuff,
814 and then start this sucker up */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
816 char *passed_p = passed_fileno;
817 sprintf(passed_fileno, "%d", elf_exec_fileno);
818
819 if (elf_interpreter) {
820 retval = copy_strings_kernel(1, &passed_p, bprm);
821 if (retval)
822 goto out_free_dentry;
823 bprm->argc++;
824 }
825 }
826
827 /* Flush all traces of the currently running executable */
828 retval = flush_old_exec(bprm);
829 if (retval)
830 goto out_free_dentry;
831
832 /* Discard our unneeded old files struct */
833 if (files) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 put_files_struct(files);
835 files = NULL;
836 }
837
838 /* OK, This is the point of no return */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 current->flags &= ~PF_FORKNOEXEC;
840 current->mm->def_flags = def_flags;
841
842 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
843 may depend on the personality. */
844 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
845 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
846 current->personality |= READ_IMPLIES_EXEC;
847
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700848 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 current->flags |= PF_RANDOMIZE;
850 arch_pick_mmap_layout(current->mm);
851
852 /* Do this so that we can load the interpreter, if need be. We will
853 change some of these later */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854 current->mm->free_area_cache = current->mm->mmap_base;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700855 current->mm->cached_hole_size = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
857 executable_stack);
858 if (retval < 0) {
859 send_sig(SIGKILL, current, 0);
860 goto out_free_dentry;
861 }
862
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863 current->mm->start_stack = bprm->p;
864
865 /* Now we do a little grungy work by mmaping the ELF image into
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700866 the correct location in memory. */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700867 for(i = 0, elf_ppnt = elf_phdata;
868 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 int elf_prot = 0, elf_flags;
870 unsigned long k, vaddr;
871
872 if (elf_ppnt->p_type != PT_LOAD)
873 continue;
874
875 if (unlikely (elf_brk > elf_bss)) {
876 unsigned long nbyte;
877
878 /* There was a PT_LOAD segment with p_memsz > p_filesz
879 before this one. Map anonymous pages, if needed,
880 and clear the area. */
881 retval = set_brk (elf_bss + load_bias,
882 elf_brk + load_bias);
883 if (retval) {
884 send_sig(SIGKILL, current, 0);
885 goto out_free_dentry;
886 }
887 nbyte = ELF_PAGEOFFSET(elf_bss);
888 if (nbyte) {
889 nbyte = ELF_MIN_ALIGN - nbyte;
890 if (nbyte > elf_brk - elf_bss)
891 nbyte = elf_brk - elf_bss;
892 if (clear_user((void __user *)elf_bss +
893 load_bias, nbyte)) {
894 /*
895 * This bss-zeroing can fail if the ELF
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700896 * file specifies odd protections. So
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 * we don't check the return value
898 */
899 }
900 }
901 }
902
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700903 if (elf_ppnt->p_flags & PF_R)
904 elf_prot |= PROT_READ;
905 if (elf_ppnt->p_flags & PF_W)
906 elf_prot |= PROT_WRITE;
907 if (elf_ppnt->p_flags & PF_X)
908 elf_prot |= PROT_EXEC;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700910 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911
912 vaddr = elf_ppnt->p_vaddr;
913 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
914 elf_flags |= MAP_FIXED;
915 } else if (loc->elf_ex.e_type == ET_DYN) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700916 /* Try and get dynamic programs out of the way of the
917 * default mmap base, as well as whatever program they
918 * might try to exec. This is because the brk will
919 * follow the loader, and is not movable. */
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700920#ifdef CONFIG_X86
921 load_bias = 0;
922#else
Linus Torvalds90cb28e2007-01-06 13:28:21 -0800923 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700924#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 }
926
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700927 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700928 elf_prot, elf_flags,0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 if (BAD_ADDR(error)) {
930 send_sig(SIGKILL, current, 0);
Alexey Kuznetsovb140f252007-05-08 00:31:57 -0700931 retval = IS_ERR((void *)error) ?
932 PTR_ERR((void*)error) : -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 goto out_free_dentry;
934 }
935
936 if (!load_addr_set) {
937 load_addr_set = 1;
938 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
939 if (loc->elf_ex.e_type == ET_DYN) {
940 load_bias += error -
941 ELF_PAGESTART(load_bias + vaddr);
942 load_addr += load_bias;
943 reloc_func_desc = load_bias;
944 }
945 }
946 k = elf_ppnt->p_vaddr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700947 if (k < start_code)
948 start_code = k;
949 if (start_data < k)
950 start_data = k;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951
952 /*
953 * Check to see if the section's size will overflow the
954 * allowed task size. Note that p_filesz must always be
955 * <= p_memsz so it is only necessary to check p_memsz.
956 */
Chuck Ebbertce510592006-07-03 00:24:14 -0700957 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 elf_ppnt->p_memsz > TASK_SIZE ||
959 TASK_SIZE - elf_ppnt->p_memsz < k) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700960 /* set_brk can never work. Avoid overflows. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 send_sig(SIGKILL, current, 0);
Alexey Kuznetsovb140f252007-05-08 00:31:57 -0700962 retval = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 goto out_free_dentry;
964 }
965
966 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
967
968 if (k > elf_bss)
969 elf_bss = k;
970 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
971 end_code = k;
972 if (end_data < k)
973 end_data = k;
974 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
975 if (k > elf_brk)
976 elf_brk = k;
977 }
978
979 loc->elf_ex.e_entry += load_bias;
980 elf_bss += load_bias;
981 elf_brk += load_bias;
982 start_code += load_bias;
983 end_code += load_bias;
984 start_data += load_bias;
985 end_data += load_bias;
986
987 /* Calling set_brk effectively mmaps the pages that we need
988 * for the bss and break sections. We must do this before
989 * mapping in the interpreter, to make sure it doesn't wind
990 * up getting placed where the bss needs to go.
991 */
992 retval = set_brk(elf_bss, elf_brk);
993 if (retval) {
994 send_sig(SIGKILL, current, 0);
995 goto out_free_dentry;
996 }
akpm@osdl.org6de50512005-10-11 08:29:08 -0700997 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 send_sig(SIGSEGV, current, 0);
999 retval = -EFAULT; /* Nobody gets to see this, but.. */
1000 goto out_free_dentry;
1001 }
1002
1003 if (elf_interpreter) {
Jan Kratochvil60bfba72007-07-15 23:40:06 -07001004 if (interpreter_type == INTERPRETER_AOUT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 elf_entry = load_aout_interp(&loc->interp_ex,
1006 interpreter);
Jan Kratochvil60bfba72007-07-15 23:40:06 -07001007 } else {
Andrew Morton4d3b5732007-07-15 23:41:03 -07001008 unsigned long uninitialized_var(interp_map_addr);
Jan Kratochvil60bfba72007-07-15 23:40:06 -07001009
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1011 interpreter,
Jan Kratochvil60bfba72007-07-15 23:40:06 -07001012 &interp_map_addr,
1013 load_bias);
1014 if (!BAD_ADDR(elf_entry)) {
1015 /*
1016 * load_elf_interp() returns relocation
1017 * adjustment
1018 */
1019 interp_load_addr = elf_entry;
1020 elf_entry += loc->interp_elf_ex.e_entry;
1021 }
1022 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 if (BAD_ADDR(elf_entry)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 force_sig(SIGSEGV, current);
Chuck Ebbertce510592006-07-03 00:24:14 -07001025 retval = IS_ERR((void *)elf_entry) ?
1026 (int)elf_entry : -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 goto out_free_dentry;
1028 }
1029 reloc_func_desc = interp_load_addr;
1030
1031 allow_write_access(interpreter);
1032 fput(interpreter);
1033 kfree(elf_interpreter);
1034 } else {
1035 elf_entry = loc->elf_ex.e_entry;
Suresh Siddha5342fba2006-02-26 04:18:28 +01001036 if (BAD_ADDR(elf_entry)) {
Chuck Ebbertce510592006-07-03 00:24:14 -07001037 force_sig(SIGSEGV, current);
1038 retval = -EINVAL;
Suresh Siddha5342fba2006-02-26 04:18:28 +01001039 goto out_free_dentry;
1040 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 }
1042
1043 kfree(elf_phdata);
1044
1045 if (interpreter_type != INTERPRETER_AOUT)
1046 sys_close(elf_exec_fileno);
1047
1048 set_binfmt(&elf_format);
1049
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -07001050#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1051 retval = arch_setup_additional_pages(bprm, executable_stack);
1052 if (retval < 0) {
1053 send_sig(SIGKILL, current, 0);
Roland McGrath18c8baf2005-04-28 15:17:19 -07001054 goto out;
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -07001055 }
1056#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1057
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 compute_creds(bprm);
1059 current->flags &= ~PF_FORKNOEXEC;
Ollie Wildb6a2fea2007-07-19 01:48:16 -07001060 retval = create_elf_tables(bprm, &loc->elf_ex,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001061 (interpreter_type == INTERPRETER_AOUT),
1062 load_addr, interp_load_addr);
Ollie Wildb6a2fea2007-07-19 01:48:16 -07001063 if (retval < 0) {
1064 send_sig(SIGKILL, current, 0);
1065 goto out;
1066 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 /* N.B. passed_fileno might not be initialized? */
1068 if (interpreter_type == INTERPRETER_AOUT)
1069 current->mm->arg_start += strlen(passed_fileno) + 1;
1070 current->mm->end_code = end_code;
1071 current->mm->start_code = start_code;
1072 current->mm->start_data = start_data;
1073 current->mm->end_data = end_data;
1074 current->mm->start_stack = bprm->p;
1075
1076 if (current->personality & MMAP_PAGE_ZERO) {
1077 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1078 and some applications "depend" upon this behavior.
1079 Since we do not have the power to recompile these, we
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001080 emulate the SVr4 behavior. Sigh. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 down_write(&current->mm->mmap_sem);
1082 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1083 MAP_FIXED | MAP_PRIVATE, 0);
1084 up_write(&current->mm->mmap_sem);
1085 }
1086
1087#ifdef ELF_PLAT_INIT
1088 /*
1089 * The ABI may specify that certain registers be set up in special
1090 * ways (on i386 %edx is the address of a DT_FINI function, for
1091 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1092 * that the e_entry field is the address of the function descriptor
1093 * for the startup routine, rather than the address of the startup
1094 * routine itself. This macro performs whatever initialization to
1095 * the regs structure is required as well as any relocations to the
1096 * function descriptor entries when executing dynamically links apps.
1097 */
1098 ELF_PLAT_INIT(regs, reloc_func_desc);
1099#endif
1100
1101 start_thread(regs, elf_entry, bprm->p);
1102 if (unlikely(current->ptrace & PT_PTRACED)) {
1103 if (current->ptrace & PT_TRACE_EXEC)
1104 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1105 else
1106 send_sig(SIGTRAP, current, 0);
1107 }
1108 retval = 0;
1109out:
1110 kfree(loc);
1111out_ret:
1112 return retval;
1113
1114 /* error cleanup */
1115out_free_dentry:
1116 allow_write_access(interpreter);
1117 if (interpreter)
1118 fput(interpreter);
1119out_free_interp:
Jesper Juhlf99d49a2005-11-07 01:01:34 -08001120 kfree(elf_interpreter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121out_free_file:
1122 sys_close(elf_exec_fileno);
1123out_free_fh:
Kirill Korotaev3b9b8ab2006-09-29 02:00:05 -07001124 if (files)
1125 reset_files_struct(current, files);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126out_free_ph:
1127 kfree(elf_phdata);
1128 goto out;
1129}
1130
1131/* This is really simpleminded and specialized - we are loading an
1132 a.out library that is given an ELF header. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133static int load_elf_library(struct file *file)
1134{
1135 struct elf_phdr *elf_phdata;
1136 struct elf_phdr *eppnt;
1137 unsigned long elf_bss, bss, len;
1138 int retval, error, i, j;
1139 struct elfhdr elf_ex;
1140
1141 error = -ENOEXEC;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001142 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 if (retval != sizeof(elf_ex))
1144 goto out;
1145
1146 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1147 goto out;
1148
1149 /* First of all, some simple consistency checks */
1150 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001151 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 goto out;
1153
1154 /* Now read in all of the header information */
1155
1156 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1157 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1158
1159 error = -ENOMEM;
1160 elf_phdata = kmalloc(j, GFP_KERNEL);
1161 if (!elf_phdata)
1162 goto out;
1163
1164 eppnt = elf_phdata;
1165 error = -ENOEXEC;
1166 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1167 if (retval != j)
1168 goto out_free_ph;
1169
1170 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1171 if ((eppnt + i)->p_type == PT_LOAD)
1172 j++;
1173 if (j != 1)
1174 goto out_free_ph;
1175
1176 while (eppnt->p_type != PT_LOAD)
1177 eppnt++;
1178
1179 /* Now use mmap to map the library into memory. */
1180 down_write(&current->mm->mmap_sem);
1181 error = do_mmap(file,
1182 ELF_PAGESTART(eppnt->p_vaddr),
1183 (eppnt->p_filesz +
1184 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1185 PROT_READ | PROT_WRITE | PROT_EXEC,
1186 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1187 (eppnt->p_offset -
1188 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1189 up_write(&current->mm->mmap_sem);
1190 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1191 goto out_free_ph;
1192
1193 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1194 if (padzero(elf_bss)) {
1195 error = -EFAULT;
1196 goto out_free_ph;
1197 }
1198
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001199 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1200 ELF_MIN_ALIGN - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 bss = eppnt->p_memsz + eppnt->p_vaddr;
1202 if (bss > len) {
1203 down_write(&current->mm->mmap_sem);
1204 do_brk(len, bss - len);
1205 up_write(&current->mm->mmap_sem);
1206 }
1207 error = 0;
1208
1209out_free_ph:
1210 kfree(elf_phdata);
1211out:
1212 return error;
1213}
1214
1215/*
1216 * Note that some platforms still use traditional core dumps and not
1217 * the ELF core dump. Each platform can select it as appropriate.
1218 */
Matt Mackall708e9a72006-01-08 01:05:25 -08001219#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220
1221/*
1222 * ELF core dumper
1223 *
1224 * Modelled on fs/exec.c:aout_core_dump()
1225 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1226 */
1227/*
1228 * These are the only things you should do on a core-file: use only these
1229 * functions to write out all the necessary info.
1230 */
1231static int dump_write(struct file *file, const void *addr, int nr)
1232{
1233 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1234}
1235
Daniel Jacobowitz5db92852005-06-15 22:26:34 -07001236static int dump_seek(struct file *file, loff_t off)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237{
Andi Kleend025c9d2006-09-30 23:29:28 -07001238 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
Petr Vandrovec7f14daa2006-10-13 04:13:16 +02001239 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 return 0;
Andi Kleend025c9d2006-09-30 23:29:28 -07001241 } else {
1242 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1243 if (!buf)
1244 return 0;
1245 while (off > 0) {
1246 unsigned long n = off;
1247 if (n > PAGE_SIZE)
1248 n = PAGE_SIZE;
1249 if (!dump_write(file, buf, n))
1250 return 0;
1251 off -= n;
1252 }
1253 free_page((unsigned long)buf);
1254 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255 return 1;
1256}
1257
1258/*
1259 * Decide whether a segment is worth dumping; default is yes to be
1260 * sure (missing info is worse than too much; etc).
1261 * Personally I'd include everything, and use the coredump limit...
1262 *
1263 * I think we should skip something. But I am not sure how. H.J.
1264 */
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001265static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266{
Roland McGrathe5b97dd2007-01-26 00:56:48 -08001267 /* The vma can be set up to tell us the answer directly. */
1268 if (vma->vm_flags & VM_ALWAYSDUMP)
1269 return 1;
1270
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 /* Do not dump I/O mapped devices or special mappings */
1272 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1273 return 0;
1274
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001275 /* By default, dump shared memory if mapped from an anonymous file. */
1276 if (vma->vm_flags & VM_SHARED) {
1277 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0)
1278 return test_bit(MMF_DUMP_ANON_SHARED, &mm_flags);
1279 else
1280 return test_bit(MMF_DUMP_MAPPED_SHARED, &mm_flags);
1281 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001283 /* By default, if it hasn't been written to, don't write it out. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 if (!vma->anon_vma)
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001285 return test_bit(MMF_DUMP_MAPPED_PRIVATE, &mm_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001287 return test_bit(MMF_DUMP_ANON_PRIVATE, &mm_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288}
1289
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290/* An ELF note in memory */
1291struct memelfnote
1292{
1293 const char *name;
1294 int type;
1295 unsigned int datasz;
1296 void *data;
1297};
1298
1299static int notesize(struct memelfnote *en)
1300{
1301 int sz;
1302
1303 sz = sizeof(struct elf_note);
1304 sz += roundup(strlen(en->name) + 1, 4);
1305 sz += roundup(en->datasz, 4);
1306
1307 return sz;
1308}
1309
Andi Kleend025c9d2006-09-30 23:29:28 -07001310#define DUMP_WRITE(addr, nr, foffset) \
1311 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312
Andi Kleend025c9d2006-09-30 23:29:28 -07001313static int alignfile(struct file *file, loff_t *foffset)
1314{
Petr Vandroveca7a0d862006-10-13 18:42:07 +02001315 static const char buf[4] = { 0, };
Andi Kleend025c9d2006-09-30 23:29:28 -07001316 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1317 return 1;
1318}
1319
1320static int writenote(struct memelfnote *men, struct file *file,
1321 loff_t *foffset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322{
1323 struct elf_note en;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 en.n_namesz = strlen(men->name) + 1;
1325 en.n_descsz = men->datasz;
1326 en.n_type = men->type;
1327
Andi Kleend025c9d2006-09-30 23:29:28 -07001328 DUMP_WRITE(&en, sizeof(en), foffset);
1329 DUMP_WRITE(men->name, en.n_namesz, foffset);
1330 if (!alignfile(file, foffset))
1331 return 0;
1332 DUMP_WRITE(men->data, men->datasz, foffset);
1333 if (!alignfile(file, foffset))
1334 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335
1336 return 1;
1337}
1338#undef DUMP_WRITE
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339
1340#define DUMP_WRITE(addr, nr) \
1341 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1342 goto end_coredump;
1343#define DUMP_SEEK(off) \
1344 if (!dump_seek(file, (off))) \
1345 goto end_coredump;
1346
Arjan van de Ven858119e2006-01-14 13:20:43 -08001347static void fill_elf_header(struct elfhdr *elf, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348{
1349 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1350 elf->e_ident[EI_CLASS] = ELF_CLASS;
1351 elf->e_ident[EI_DATA] = ELF_DATA;
1352 elf->e_ident[EI_VERSION] = EV_CURRENT;
1353 elf->e_ident[EI_OSABI] = ELF_OSABI;
1354 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1355
1356 elf->e_type = ET_CORE;
1357 elf->e_machine = ELF_ARCH;
1358 elf->e_version = EV_CURRENT;
1359 elf->e_entry = 0;
1360 elf->e_phoff = sizeof(struct elfhdr);
1361 elf->e_shoff = 0;
1362 elf->e_flags = ELF_CORE_EFLAGS;
1363 elf->e_ehsize = sizeof(struct elfhdr);
1364 elf->e_phentsize = sizeof(struct elf_phdr);
1365 elf->e_phnum = segs;
1366 elf->e_shentsize = 0;
1367 elf->e_shnum = 0;
1368 elf->e_shstrndx = 0;
1369 return;
1370}
1371
Andrew Morton8d6b5eee2006-09-25 23:32:04 -07001372static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373{
1374 phdr->p_type = PT_NOTE;
1375 phdr->p_offset = offset;
1376 phdr->p_vaddr = 0;
1377 phdr->p_paddr = 0;
1378 phdr->p_filesz = sz;
1379 phdr->p_memsz = 0;
1380 phdr->p_flags = 0;
1381 phdr->p_align = 0;
1382 return;
1383}
1384
1385static void fill_note(struct memelfnote *note, const char *name, int type,
1386 unsigned int sz, void *data)
1387{
1388 note->name = name;
1389 note->type = type;
1390 note->datasz = sz;
1391 note->data = data;
1392 return;
1393}
1394
1395/*
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001396 * fill up all the fields in prstatus from the given task struct, except
1397 * registers which need to be filled up separately.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 */
1399static void fill_prstatus(struct elf_prstatus *prstatus,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001400 struct task_struct *p, long signr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401{
1402 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1403 prstatus->pr_sigpend = p->pending.signal.sig[0];
1404 prstatus->pr_sighold = p->blocked.sig[0];
1405 prstatus->pr_pid = p->pid;
1406 prstatus->pr_ppid = p->parent->pid;
1407 prstatus->pr_pgrp = process_group(p);
Cedric Le Goater937949d2006-12-08 02:37:54 -08001408 prstatus->pr_sid = process_session(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409 if (thread_group_leader(p)) {
1410 /*
1411 * This is the record for the group leader. Add in the
1412 * cumulative times of previous dead threads. This total
1413 * won't include the time of each live thread whose state
1414 * is included in the core dump. The final total reported
1415 * to our parent process when it calls wait4 will include
1416 * those sums as well as the little bit more time it takes
1417 * this and each other thread to finish dying after the
1418 * core dump synchronization phase.
1419 */
1420 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1421 &prstatus->pr_utime);
1422 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1423 &prstatus->pr_stime);
1424 } else {
1425 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1426 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1427 }
1428 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1429 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1430}
1431
1432static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1433 struct mm_struct *mm)
1434{
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -07001435 unsigned int i, len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436
1437 /* first copy the parameters from user space */
1438 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1439
1440 len = mm->arg_end - mm->arg_start;
1441 if (len >= ELF_PRARGSZ)
1442 len = ELF_PRARGSZ-1;
1443 if (copy_from_user(&psinfo->pr_psargs,
1444 (const char __user *)mm->arg_start, len))
1445 return -EFAULT;
1446 for(i = 0; i < len; i++)
1447 if (psinfo->pr_psargs[i] == 0)
1448 psinfo->pr_psargs[i] = ' ';
1449 psinfo->pr_psargs[len] = 0;
1450
1451 psinfo->pr_pid = p->pid;
1452 psinfo->pr_ppid = p->parent->pid;
1453 psinfo->pr_pgrp = process_group(p);
Cedric Le Goater937949d2006-12-08 02:37:54 -08001454 psinfo->pr_sid = process_session(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455
1456 i = p->state ? ffz(~p->state) + 1 : 0;
1457 psinfo->pr_state = i;
Carsten Otte55148542006-03-25 03:08:22 -08001458 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1460 psinfo->pr_nice = task_nice(p);
1461 psinfo->pr_flag = p->flags;
1462 SET_UID(psinfo->pr_uid, p->uid);
1463 SET_GID(psinfo->pr_gid, p->gid);
1464 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1465
1466 return 0;
1467}
1468
1469/* Here is the structure in which status of each thread is captured. */
1470struct elf_thread_status
1471{
1472 struct list_head list;
1473 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1474 elf_fpregset_t fpu; /* NT_PRFPREG */
1475 struct task_struct *thread;
1476#ifdef ELF_CORE_COPY_XFPREGS
1477 elf_fpxregset_t xfpu; /* NT_PRXFPREG */
1478#endif
1479 struct memelfnote notes[3];
1480 int num_notes;
1481};
1482
1483/*
1484 * In order to add the specific thread information for the elf file format,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001485 * we need to keep a linked list of every threads pr_status and then create
1486 * a single section for them in the final core file.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 */
1488static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1489{
1490 int sz = 0;
1491 struct task_struct *p = t->thread;
1492 t->num_notes = 0;
1493
1494 fill_prstatus(&t->prstatus, p, signr);
1495 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1496
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001497 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1498 &(t->prstatus));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 t->num_notes++;
1500 sz += notesize(&t->notes[0]);
1501
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001502 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1503 &t->fpu))) {
1504 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1505 &(t->fpu));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 t->num_notes++;
1507 sz += notesize(&t->notes[1]);
1508 }
1509
1510#ifdef ELF_CORE_COPY_XFPREGS
1511 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001512 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1513 &t->xfpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 t->num_notes++;
1515 sz += notesize(&t->notes[2]);
1516 }
1517#endif
1518 return sz;
1519}
1520
Roland McGrathf47aef52007-01-26 00:56:49 -08001521static struct vm_area_struct *first_vma(struct task_struct *tsk,
1522 struct vm_area_struct *gate_vma)
1523{
1524 struct vm_area_struct *ret = tsk->mm->mmap;
1525
1526 if (ret)
1527 return ret;
1528 return gate_vma;
1529}
1530/*
1531 * Helper function for iterating across a vma list. It ensures that the caller
1532 * will visit `gate_vma' prior to terminating the search.
1533 */
1534static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1535 struct vm_area_struct *gate_vma)
1536{
1537 struct vm_area_struct *ret;
1538
1539 ret = this_vma->vm_next;
1540 if (ret)
1541 return ret;
1542 if (this_vma == gate_vma)
1543 return NULL;
1544 return gate_vma;
1545}
1546
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547/*
1548 * Actual dumper
1549 *
1550 * This is a two-pass process; first we find the offsets of the bits,
1551 * and then they are actually written out. If we run out of core limit
1552 * we just truncate.
1553 */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001554static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555{
1556#define NUM_NOTES 6
1557 int has_dumped = 0;
1558 mm_segment_t fs;
1559 int segs;
1560 size_t size = 0;
1561 int i;
Roland McGrathf47aef52007-01-26 00:56:49 -08001562 struct vm_area_struct *vma, *gate_vma;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 struct elfhdr *elf = NULL;
Andi Kleend025c9d2006-09-30 23:29:28 -07001564 loff_t offset = 0, dataoff, foffset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1566 int numnote;
1567 struct memelfnote *notes = NULL;
1568 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1569 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1570 struct task_struct *g, *p;
1571 LIST_HEAD(thread_list);
1572 struct list_head *t;
1573 elf_fpregset_t *fpu = NULL;
1574#ifdef ELF_CORE_COPY_XFPREGS
1575 elf_fpxregset_t *xfpu = NULL;
1576#endif
1577 int thread_status_size = 0;
1578 elf_addr_t *auxv;
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001579 unsigned long mm_flags;
Michael Ellermanef7320e2007-07-06 02:39:49 -07001580#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1581 int extra_notes_size;
1582#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583
1584 /*
1585 * We no longer stop all VM operations.
1586 *
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001587 * This is because those proceses that could possibly change map_count
1588 * or the mmap / vma pages are now blocked in do_exit on current
1589 * finishing this core dump.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590 *
1591 * Only ptrace can touch these memory addresses, but it doesn't change
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001592 * the map_count or the pages allocated. So no possibility of crashing
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593 * exists while dumping the mm->vm_next areas to the core file.
1594 */
1595
1596 /* alloc memory for large data structures: too large to be on stack */
1597 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1598 if (!elf)
1599 goto cleanup;
1600 prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1601 if (!prstatus)
1602 goto cleanup;
1603 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1604 if (!psinfo)
1605 goto cleanup;
1606 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1607 if (!notes)
1608 goto cleanup;
1609 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1610 if (!fpu)
1611 goto cleanup;
1612#ifdef ELF_CORE_COPY_XFPREGS
1613 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1614 if (!xfpu)
1615 goto cleanup;
1616#endif
1617
1618 if (signr) {
1619 struct elf_thread_status *tmp;
Oleg Nesterov486ccb02006-09-29 02:00:24 -07001620 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 do_each_thread(g,p)
1622 if (current->mm == p->mm && current != p) {
Oliver Neukum11b0b5a2006-03-25 03:08:13 -08001623 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624 if (!tmp) {
Oleg Nesterov486ccb02006-09-29 02:00:24 -07001625 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 goto cleanup;
1627 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 tmp->thread = p;
1629 list_add(&tmp->list, &thread_list);
1630 }
1631 while_each_thread(g,p);
Oleg Nesterov486ccb02006-09-29 02:00:24 -07001632 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633 list_for_each(t, &thread_list) {
1634 struct elf_thread_status *tmp;
1635 int sz;
1636
1637 tmp = list_entry(t, struct elf_thread_status, list);
1638 sz = elf_dump_thread_status(signr, tmp);
1639 thread_status_size += sz;
1640 }
1641 }
1642 /* now collect the dump for the current */
1643 memset(prstatus, 0, sizeof(*prstatus));
1644 fill_prstatus(prstatus, current, signr);
1645 elf_core_copy_regs(&prstatus->pr_reg, regs);
1646
1647 segs = current->mm->map_count;
1648#ifdef ELF_CORE_EXTRA_PHDRS
1649 segs += ELF_CORE_EXTRA_PHDRS;
1650#endif
1651
Roland McGrathf47aef52007-01-26 00:56:49 -08001652 gate_vma = get_gate_vma(current);
1653 if (gate_vma != NULL)
1654 segs++;
1655
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656 /* Set up header */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001657 fill_elf_header(elf, segs + 1); /* including notes section */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658
1659 has_dumped = 1;
1660 current->flags |= PF_DUMPCORE;
1661
1662 /*
1663 * Set up the notes in similar form to SVR4 core dumps made
1664 * with info from their /proc.
1665 */
1666
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001667 fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 fill_psinfo(psinfo, current->group_leader, current->mm);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001669 fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670
Eric W. Biedermana9289722005-10-30 15:02:08 -08001671 numnote = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001673 auxv = (elf_addr_t *)current->mm->saved_auxv;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674
1675 i = 0;
1676 do
1677 i += 2;
1678 while (auxv[i - 2] != AT_NULL);
1679 fill_note(&notes[numnote++], "CORE", NT_AUXV,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001680 i * sizeof(elf_addr_t), auxv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681
1682 /* Try to dump the FPU. */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001683 if ((prstatus->pr_fpvalid =
1684 elf_core_copy_task_fpregs(current, regs, fpu)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 fill_note(notes + numnote++,
1686 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1687#ifdef ELF_CORE_COPY_XFPREGS
1688 if (elf_core_copy_task_xfpregs(current, xfpu))
1689 fill_note(notes + numnote++,
1690 "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1691#endif
1692
1693 fs = get_fs();
1694 set_fs(KERNEL_DS);
1695
1696 DUMP_WRITE(elf, sizeof(*elf));
1697 offset += sizeof(*elf); /* Elf header */
Petr Vandroveca7a0d862006-10-13 18:42:07 +02001698 offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1699 foffset = offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700
1701 /* Write notes phdr entry */
1702 {
1703 struct elf_phdr phdr;
1704 int sz = 0;
1705
1706 for (i = 0; i < numnote; i++)
1707 sz += notesize(notes + i);
1708
1709 sz += thread_status_size;
1710
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001711#ifdef ELF_CORE_WRITE_EXTRA_NOTES
Michael Ellermanef7320e2007-07-06 02:39:49 -07001712 extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1713 sz += extra_notes_size;
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001714#endif
1715
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 fill_elf_note_phdr(&phdr, sz, offset);
1717 offset += sz;
1718 DUMP_WRITE(&phdr, sizeof(phdr));
1719 }
1720
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1722
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001723 /*
1724 * We must use the same mm->flags while dumping core to avoid
1725 * inconsistency between the program headers and bodies, otherwise an
1726 * unusable core file can be generated.
1727 */
1728 mm_flags = current->mm->flags;
1729
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 /* Write program headers for segments dump */
Roland McGrathf47aef52007-01-26 00:56:49 -08001731 for (vma = first_vma(current, gate_vma); vma != NULL;
1732 vma = next_vma(vma, gate_vma)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 struct elf_phdr phdr;
1734 size_t sz;
1735
1736 sz = vma->vm_end - vma->vm_start;
1737
1738 phdr.p_type = PT_LOAD;
1739 phdr.p_offset = offset;
1740 phdr.p_vaddr = vma->vm_start;
1741 phdr.p_paddr = 0;
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001742 phdr.p_filesz = maydump(vma, mm_flags) ? sz : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 phdr.p_memsz = sz;
1744 offset += phdr.p_filesz;
1745 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001746 if (vma->vm_flags & VM_WRITE)
1747 phdr.p_flags |= PF_W;
1748 if (vma->vm_flags & VM_EXEC)
1749 phdr.p_flags |= PF_X;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750 phdr.p_align = ELF_EXEC_PAGESIZE;
1751
1752 DUMP_WRITE(&phdr, sizeof(phdr));
1753 }
1754
1755#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1756 ELF_CORE_WRITE_EXTRA_PHDRS;
1757#endif
1758
1759 /* write out the notes section */
1760 for (i = 0; i < numnote; i++)
Andi Kleend025c9d2006-09-30 23:29:28 -07001761 if (!writenote(notes + i, file, &foffset))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 goto end_coredump;
1763
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001764#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1765 ELF_CORE_WRITE_EXTRA_NOTES;
Michael Ellermanef7320e2007-07-06 02:39:49 -07001766 foffset += extra_notes_size;
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001767#endif
1768
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769 /* write out the thread status notes section */
1770 list_for_each(t, &thread_list) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001771 struct elf_thread_status *tmp =
1772 list_entry(t, struct elf_thread_status, list);
1773
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 for (i = 0; i < tmp->num_notes; i++)
Andi Kleend025c9d2006-09-30 23:29:28 -07001775 if (!writenote(&tmp->notes[i], file, &foffset))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776 goto end_coredump;
1777 }
Andi Kleend025c9d2006-09-30 23:29:28 -07001778
1779 /* Align to page */
1780 DUMP_SEEK(dataoff - foffset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781
Roland McGrathf47aef52007-01-26 00:56:49 -08001782 for (vma = first_vma(current, gate_vma); vma != NULL;
1783 vma = next_vma(vma, gate_vma)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784 unsigned long addr;
1785
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001786 if (!maydump(vma, mm_flags))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 continue;
1788
1789 for (addr = vma->vm_start;
1790 addr < vma->vm_end;
1791 addr += PAGE_SIZE) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001792 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793 struct vm_area_struct *vma;
1794
1795 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1796 &page, &vma) <= 0) {
Andi Kleend025c9d2006-09-30 23:29:28 -07001797 DUMP_SEEK(PAGE_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798 } else {
1799 if (page == ZERO_PAGE(addr)) {
Brian Pomerantz03221702007-04-01 23:49:41 -07001800 if (!dump_seek(file, PAGE_SIZE)) {
1801 page_cache_release(page);
1802 goto end_coredump;
1803 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804 } else {
1805 void *kaddr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001806 flush_cache_page(vma, addr,
1807 page_to_pfn(page));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808 kaddr = kmap(page);
1809 if ((size += PAGE_SIZE) > limit ||
1810 !dump_write(file, kaddr,
1811 PAGE_SIZE)) {
1812 kunmap(page);
1813 page_cache_release(page);
1814 goto end_coredump;
1815 }
1816 kunmap(page);
1817 }
1818 page_cache_release(page);
1819 }
1820 }
1821 }
1822
1823#ifdef ELF_CORE_WRITE_EXTRA_DATA
1824 ELF_CORE_WRITE_EXTRA_DATA;
1825#endif
1826
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827end_coredump:
1828 set_fs(fs);
1829
1830cleanup:
Jesper Juhl74da6cd2006-01-11 01:51:26 +01001831 while (!list_empty(&thread_list)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832 struct list_head *tmp = thread_list.next;
1833 list_del(tmp);
1834 kfree(list_entry(tmp, struct elf_thread_status, list));
1835 }
1836
1837 kfree(elf);
1838 kfree(prstatus);
1839 kfree(psinfo);
1840 kfree(notes);
1841 kfree(fpu);
1842#ifdef ELF_CORE_COPY_XFPREGS
1843 kfree(xfpu);
1844#endif
1845 return has_dumped;
1846#undef NUM_NOTES
1847}
1848
1849#endif /* USE_ELF_CORE_DUMP */
1850
1851static int __init init_elf_binfmt(void)
1852{
1853 return register_binfmt(&elf_format);
1854}
1855
1856static void __exit exit_elf_binfmt(void)
1857{
1858 /* Remove the COFF and ELF loaders. */
1859 unregister_binfmt(&elf_format);
1860}
1861
1862core_initcall(init_elf_binfmt);
1863module_exit(exit_elf_binfmt);
1864MODULE_LICENSE("GPL");