blob: a27e42bf340030402e5905b4830e959906e875ab [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/fs.h>
15#include <linux/stat.h>
16#include <linux/time.h>
17#include <linux/mm.h>
18#include <linux/mman.h>
19#include <linux/a.out.h>
20#include <linux/errno.h>
21#include <linux/signal.h>
22#include <linux/binfmts.h>
23#include <linux/string.h>
24#include <linux/file.h>
25#include <linux/fcntl.h>
26#include <linux/ptrace.h>
27#include <linux/slab.h>
28#include <linux/shm.h>
29#include <linux/personality.h>
30#include <linux/elfcore.h>
31#include <linux/init.h>
32#include <linux/highuid.h>
33#include <linux/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <linux/compiler.h>
35#include <linux/highmem.h>
36#include <linux/pagemap.h>
37#include <linux/security.h>
38#include <linux/syscalls.h>
39#include <linux/random.h>
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070040#include <linux/elf.h>
Alexey Dobriyan7e80d0d2007-05-08 00:28:59 -070041#include <linux/utsname.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <asm/uaccess.h>
43#include <asm/param.h>
44#include <asm/page.h>
45
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070046static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47static int load_elf_library(struct file *);
Jan Kratochvil60bfba72007-07-15 23:40:06 -070048static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
Linus Torvalds1da177e2005-04-16 15:20:36 -070050/*
51 * If we don't support core dumping, then supply a NULL so we
52 * don't even try.
53 */
Matt Mackall708e9a72006-01-08 01:05:25 -080054#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070055static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#else
57#define elf_core_dump NULL
58#endif
59
60#if ELF_EXEC_PAGESIZE > PAGE_SIZE
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070061#define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#else
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070063#define ELF_MIN_ALIGN PAGE_SIZE
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#endif
65
66#ifndef ELF_CORE_EFLAGS
67#define ELF_CORE_EFLAGS 0
68#endif
69
70#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74static struct linux_binfmt elf_format = {
75 .module = THIS_MODULE,
76 .load_binary = load_elf_binary,
77 .load_shlib = load_elf_library,
78 .core_dump = elf_core_dump,
Andi Kleen9fbbd4d2007-02-13 13:26:26 +010079 .min_coredump = ELF_EXEC_PAGESIZE,
80 .hasvdso = 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070081};
82
Jan Kratochvil60bfba72007-07-15 23:40:06 -070083#define BAD_ADDR(x) IS_ERR_VALUE(x)
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int set_brk(unsigned long start, unsigned long end)
86{
87 start = ELF_PAGEALIGN(start);
88 end = ELF_PAGEALIGN(end);
89 if (end > start) {
90 unsigned long addr;
91 down_write(&current->mm->mmap_sem);
92 addr = do_brk(start, end - start);
93 up_write(&current->mm->mmap_sem);
94 if (BAD_ADDR(addr))
95 return addr;
96 }
97 current->mm->start_brk = current->mm->brk = end;
98 return 0;
99}
100
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101/* We need to explicitly zero any fractional pages
102 after the data section (i.e. bss). This would
103 contain the junk from the file that should not
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700104 be in memory
105 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106static int padzero(unsigned long elf_bss)
107{
108 unsigned long nbyte;
109
110 nbyte = ELF_PAGEOFFSET(elf_bss);
111 if (nbyte) {
112 nbyte = ELF_MIN_ALIGN - nbyte;
113 if (clear_user((void __user *) elf_bss, nbyte))
114 return -EFAULT;
115 }
116 return 0;
117}
118
119/* Let's use some macros to make this stack manipulation a litle clearer */
120#ifdef CONFIG_STACK_GROWSUP
121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122#define STACK_ROUND(sp, items) \
123 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700124#define STACK_ALLOC(sp, len) ({ \
125 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 old_sp; })
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127#else
128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129#define STACK_ROUND(sp, items) \
130 (((unsigned long) (sp - items)) &~ 15UL)
131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132#endif
133
134static int
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700135create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136 int interp_aout, unsigned long load_addr,
137 unsigned long interp_load_addr)
138{
139 unsigned long p = bprm->p;
140 int argc = bprm->argc;
141 int envc = bprm->envc;
142 elf_addr_t __user *argv;
143 elf_addr_t __user *envp;
144 elf_addr_t __user *sp;
145 elf_addr_t __user *u_platform;
146 const char *k_platform = ELF_PLATFORM;
147 int items;
148 elf_addr_t *elf_info;
149 int ei_index = 0;
150 struct task_struct *tsk = current;
151
152 /*
153 * If this architecture has a platform capability string, copy it
154 * to userspace. In some cases (Sparc), this info is impossible
155 * for userspace to get any other way, in others (i386) it is
156 * merely difficult.
157 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 u_platform = NULL;
159 if (k_platform) {
160 size_t len = strlen(k_platform) + 1;
161
162 /*
163 * In some cases (e.g. Hyper-Threading), we want to avoid L1
164 * evictions by the processes running on the same package. One
165 * thing we can do is to shuffle the initial stack for them.
166 */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700167
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 p = arch_align_stack(p);
169
170 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
171 if (__copy_to_user(u_platform, k_platform, len))
172 return -EFAULT;
173 }
174
175 /* Create the ELF interpreter info */
Jesper Juhl785d5572006-06-23 02:05:35 -0700176 elf_info = (elf_addr_t *)current->mm->saved_auxv;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177#define NEW_AUX_ENT(id, val) \
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700178 do { \
Jesper Juhl785d5572006-06-23 02:05:35 -0700179 elf_info[ei_index++] = id; \
180 elf_info[ei_index++] = val; \
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700181 } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
183#ifdef ARCH_DLINFO
184 /*
185 * ARCH_DLINFO must come first so PPC can do its special alignment of
186 * AUXV.
187 */
188 ARCH_DLINFO;
189#endif
190 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
191 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
192 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
193 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700194 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
196 NEW_AUX_ENT(AT_BASE, interp_load_addr);
197 NEW_AUX_ENT(AT_FLAGS, 0);
198 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
Jesper Juhl785d5572006-06-23 02:05:35 -0700199 NEW_AUX_ENT(AT_UID, tsk->uid);
200 NEW_AUX_ENT(AT_EUID, tsk->euid);
201 NEW_AUX_ENT(AT_GID, tsk->gid);
202 NEW_AUX_ENT(AT_EGID, tsk->egid);
203 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 if (k_platform) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700205 NEW_AUX_ENT(AT_PLATFORM,
Jesper Juhl785d5572006-06-23 02:05:35 -0700206 (elf_addr_t)(unsigned long)u_platform);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 }
208 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
Jesper Juhl785d5572006-06-23 02:05:35 -0700209 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 }
211#undef NEW_AUX_ENT
212 /* AT_NULL is zero; clear the rest too */
213 memset(&elf_info[ei_index], 0,
214 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
215
216 /* And advance past the AT_NULL entry. */
217 ei_index += 2;
218
219 sp = STACK_ADD(p, ei_index);
220
221 items = (argc + 1) + (envc + 1);
222 if (interp_aout) {
223 items += 3; /* a.out interpreters require argv & envp too */
224 } else {
225 items += 1; /* ELF interpreters only put argc on the stack */
226 }
227 bprm->p = STACK_ROUND(sp, items);
228
229 /* Point sp at the lowest address on the stack */
230#ifdef CONFIG_STACK_GROWSUP
231 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700232 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233#else
234 sp = (elf_addr_t __user *)bprm->p;
235#endif
236
237 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
238 if (__put_user(argc, sp++))
239 return -EFAULT;
240 if (interp_aout) {
241 argv = sp + 2;
242 envp = argv + argc + 1;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800243 if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
244 __put_user((elf_addr_t)(unsigned long)envp, sp++))
245 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 } else {
247 argv = sp;
248 envp = argv + argc + 1;
249 }
250
251 /* Populate argv and envp */
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -0700252 p = current->mm->arg_end = current->mm->arg_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 while (argc-- > 0) {
254 size_t len;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800255 if (__put_user((elf_addr_t)p, argv++))
256 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
258 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
259 return 0;
260 p += len;
261 }
262 if (__put_user(0, argv))
263 return -EFAULT;
264 current->mm->arg_end = current->mm->env_start = p;
265 while (envc-- > 0) {
266 size_t len;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800267 if (__put_user((elf_addr_t)p, envp++))
268 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
270 if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
271 return 0;
272 p += len;
273 }
274 if (__put_user(0, envp))
275 return -EFAULT;
276 current->mm->env_end = p;
277
278 /* Put the elf_info on the stack in the right place. */
279 sp = (elf_addr_t __user *)envp + 1;
280 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281 return -EFAULT;
282 return 0;
283}
284
285#ifndef elf_map
286
287static unsigned long elf_map(struct file *filep, unsigned long addr,
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700288 struct elf_phdr *eppnt, int prot, int type,
289 unsigned long total_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290{
291 unsigned long map_addr;
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700292 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
293 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
294 addr = ELF_PAGESTART(addr);
295 size = ELF_PAGEALIGN(size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296
David Gibsondda6ebd2006-01-08 01:03:35 -0800297 /* mmap() will return -EINVAL if given a zero size, but a
298 * segment with zero filesize is perfectly valid */
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700299 if (!size)
300 return addr;
301
302 down_write(&current->mm->mmap_sem);
303 /*
304 * total_size is the size of the ELF (interpreter) image.
305 * The _first_ mmap needs to know the full size, otherwise
306 * randomization might put this image into an overlapping
307 * position with the ELF binary image. (since size < total_size)
308 * So we first map the 'big' image - and unmap the remainder at
309 * the end. (which unmap is needed for ELF images with holes.)
310 */
311 if (total_size) {
312 total_size = ELF_PAGEALIGN(total_size);
313 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
314 if (!BAD_ADDR(map_addr))
315 do_munmap(current->mm, map_addr+size, total_size-size);
316 } else
317 map_addr = do_mmap(filep, addr, size, prot, type, off);
318
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319 up_write(&current->mm->mmap_sem);
320 return(map_addr);
321}
322
323#endif /* !elf_map */
324
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700325static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
326{
327 int i, first_idx = -1, last_idx = -1;
328
329 for (i = 0; i < nr; i++) {
330 if (cmds[i].p_type == PT_LOAD) {
331 last_idx = i;
332 if (first_idx == -1)
333 first_idx = i;
334 }
335 }
336 if (first_idx == -1)
337 return 0;
338
339 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
340 ELF_PAGESTART(cmds[first_idx].p_vaddr);
341}
342
343
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344/* This is much more generalized than the library routine read function,
345 so we keep this separate. Technically the library read function
346 is only provided so that we can read a.out libraries that have
347 an ELF header */
348
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700349static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700350 struct file *interpreter, unsigned long *interp_map_addr,
351 unsigned long no_base)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352{
353 struct elf_phdr *elf_phdata;
354 struct elf_phdr *eppnt;
355 unsigned long load_addr = 0;
356 int load_addr_set = 0;
357 unsigned long last_bss = 0, elf_bss = 0;
358 unsigned long error = ~0UL;
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700359 unsigned long total_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 int retval, i, size;
361
362 /* First of all, some simple consistency checks */
363 if (interp_elf_ex->e_type != ET_EXEC &&
364 interp_elf_ex->e_type != ET_DYN)
365 goto out;
366 if (!elf_check_arch(interp_elf_ex))
367 goto out;
368 if (!interpreter->f_op || !interpreter->f_op->mmap)
369 goto out;
370
371 /*
372 * If the size of this structure has changed, then punt, since
373 * we will be doing the wrong thing.
374 */
375 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
376 goto out;
377 if (interp_elf_ex->e_phnum < 1 ||
378 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
379 goto out;
380
381 /* Now read in all of the header information */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
383 if (size > ELF_MIN_ALIGN)
384 goto out;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700385 elf_phdata = kmalloc(size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386 if (!elf_phdata)
387 goto out;
388
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700389 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
390 (char *)elf_phdata,size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 error = -EIO;
392 if (retval != size) {
393 if (retval < 0)
394 error = retval;
395 goto out_close;
396 }
397
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700398 total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
399 if (!total_size) {
400 error = -EINVAL;
401 goto out_close;
402 }
403
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 eppnt = elf_phdata;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700405 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
406 if (eppnt->p_type == PT_LOAD) {
407 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
408 int elf_prot = 0;
409 unsigned long vaddr = 0;
410 unsigned long k, map_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700412 if (eppnt->p_flags & PF_R)
413 elf_prot = PROT_READ;
414 if (eppnt->p_flags & PF_W)
415 elf_prot |= PROT_WRITE;
416 if (eppnt->p_flags & PF_X)
417 elf_prot |= PROT_EXEC;
418 vaddr = eppnt->p_vaddr;
419 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
420 elf_type |= MAP_FIXED;
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700421 else if (no_base && interp_elf_ex->e_type == ET_DYN)
422 load_addr = -vaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700424 map_addr = elf_map(interpreter, load_addr + vaddr,
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700425 eppnt, elf_prot, elf_type, total_size);
426 total_size = 0;
427 if (!*interp_map_addr)
428 *interp_map_addr = map_addr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700429 error = map_addr;
430 if (BAD_ADDR(map_addr))
431 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700433 if (!load_addr_set &&
434 interp_elf_ex->e_type == ET_DYN) {
435 load_addr = map_addr - ELF_PAGESTART(vaddr);
436 load_addr_set = 1;
437 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700439 /*
440 * Check to see if the section's size will overflow the
441 * allowed task size. Note that p_filesz must always be
442 * <= p_memsize so it's only necessary to check p_memsz.
443 */
444 k = load_addr + eppnt->p_vaddr;
Chuck Ebbertce510592006-07-03 00:24:14 -0700445 if (BAD_ADDR(k) ||
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700446 eppnt->p_filesz > eppnt->p_memsz ||
447 eppnt->p_memsz > TASK_SIZE ||
448 TASK_SIZE - eppnt->p_memsz < k) {
449 error = -ENOMEM;
450 goto out_close;
451 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700453 /*
454 * Find the end of the file mapping for this phdr, and
455 * keep track of the largest address we see for this.
456 */
457 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
458 if (k > elf_bss)
459 elf_bss = k;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700461 /*
462 * Do the same thing for the memory mapping - between
463 * elf_bss and last_bss is the bss section.
464 */
465 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
466 if (k > last_bss)
467 last_bss = k;
468 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 }
470
471 /*
472 * Now fill out the bss section. First pad the last page up
473 * to the page boundary, and then perform a mmap to make sure
474 * that there are zero-mapped pages up to and including the
475 * last bss page.
476 */
477 if (padzero(elf_bss)) {
478 error = -EFAULT;
479 goto out_close;
480 }
481
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700482 /* What we have mapped so far */
483 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484
485 /* Map the last of the bss segment */
486 if (last_bss > elf_bss) {
487 down_write(&current->mm->mmap_sem);
488 error = do_brk(elf_bss, last_bss - elf_bss);
489 up_write(&current->mm->mmap_sem);
490 if (BAD_ADDR(error))
491 goto out_close;
492 }
493
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700494 error = load_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
496out_close:
497 kfree(elf_phdata);
498out:
499 return error;
500}
501
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700502static unsigned long load_aout_interp(struct exec *interp_ex,
503 struct file *interpreter)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504{
505 unsigned long text_data, elf_entry = ~0UL;
506 char __user * addr;
507 loff_t offset;
508
509 current->mm->end_code = interp_ex->a_text;
510 text_data = interp_ex->a_text + interp_ex->a_data;
511 current->mm->end_data = text_data;
512 current->mm->brk = interp_ex->a_bss + text_data;
513
514 switch (N_MAGIC(*interp_ex)) {
515 case OMAGIC:
516 offset = 32;
517 addr = (char __user *)0;
518 break;
519 case ZMAGIC:
520 case QMAGIC:
521 offset = N_TXTOFF(*interp_ex);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700522 addr = (char __user *)N_TXTADDR(*interp_ex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523 break;
524 default:
525 goto out;
526 }
527
528 down_write(&current->mm->mmap_sem);
529 do_brk(0, text_data);
530 up_write(&current->mm->mmap_sem);
531 if (!interpreter->f_op || !interpreter->f_op->read)
532 goto out;
533 if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
534 goto out;
535 flush_icache_range((unsigned long)addr,
536 (unsigned long)addr + text_data);
537
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 down_write(&current->mm->mmap_sem);
539 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
540 interp_ex->a_bss);
541 up_write(&current->mm->mmap_sem);
542 elf_entry = interp_ex->a_entry;
543
544out:
545 return elf_entry;
546}
547
548/*
549 * These are the functions used to load ELF style executables and shared
550 * libraries. There is no binary dependent code anywhere else.
551 */
552
553#define INTERPRETER_NONE 0
554#define INTERPRETER_AOUT 1
555#define INTERPRETER_ELF 2
556
Andi Kleen913bd902006-03-25 16:29:09 +0100557#ifndef STACK_RND_MASK
James Bottomleyd1cabd62007-03-16 13:38:35 -0800558#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
Andi Kleen913bd902006-03-25 16:29:09 +0100559#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560
561static unsigned long randomize_stack_top(unsigned long stack_top)
562{
563 unsigned int random_variable = 0;
564
Andi Kleenc16b63e02006-09-26 10:52:28 +0200565 if ((current->flags & PF_RANDOMIZE) &&
566 !(current->personality & ADDR_NO_RANDOMIZE)) {
Andi Kleen913bd902006-03-25 16:29:09 +0100567 random_variable = get_random_int() & STACK_RND_MASK;
568 random_variable <<= PAGE_SHIFT;
569 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570#ifdef CONFIG_STACK_GROWSUP
Andi Kleen913bd902006-03-25 16:29:09 +0100571 return PAGE_ALIGN(stack_top) + random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572#else
Andi Kleen913bd902006-03-25 16:29:09 +0100573 return PAGE_ALIGN(stack_top) - random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574#endif
575}
576
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700577static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578{
579 struct file *interpreter = NULL; /* to shut gcc up */
580 unsigned long load_addr = 0, load_bias = 0;
581 int load_addr_set = 0;
582 char * elf_interpreter = NULL;
583 unsigned int interpreter_type = INTERPRETER_NONE;
584 unsigned char ibcs2_interpreter = 0;
585 unsigned long error;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700586 struct elf_phdr *elf_ppnt, *elf_phdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 unsigned long elf_bss, elf_brk;
588 int elf_exec_fileno;
589 int retval, i;
590 unsigned int size;
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700591 unsigned long elf_entry;
592 unsigned long interp_load_addr = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 unsigned long start_code, end_code, start_data, end_data;
594 unsigned long reloc_func_desc = 0;
595 char passed_fileno[6];
596 struct files_struct *files;
David Rientjes8de61e62006-12-06 20:40:16 -0800597 int executable_stack = EXSTACK_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 unsigned long def_flags = 0;
599 struct {
600 struct elfhdr elf_ex;
601 struct elfhdr interp_elf_ex;
602 struct exec interp_ex;
603 } *loc;
604
605 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
606 if (!loc) {
607 retval = -ENOMEM;
608 goto out_ret;
609 }
610
611 /* Get the exec-header */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700612 loc->elf_ex = *((struct elfhdr *)bprm->buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613
614 retval = -ENOEXEC;
615 /* First of all, some simple consistency checks */
616 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
617 goto out;
618
619 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
620 goto out;
621 if (!elf_check_arch(&loc->elf_ex))
622 goto out;
623 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
624 goto out;
625
626 /* Now read in all of the header information */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
628 goto out;
629 if (loc->elf_ex.e_phnum < 1 ||
630 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
631 goto out;
632 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
633 retval = -ENOMEM;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700634 elf_phdata = kmalloc(size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 if (!elf_phdata)
636 goto out;
637
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700638 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
639 (char *)elf_phdata, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 if (retval != size) {
641 if (retval >= 0)
642 retval = -EIO;
643 goto out_free_ph;
644 }
645
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700646 files = current->files; /* Refcounted so ok */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 retval = unshare_files();
648 if (retval < 0)
649 goto out_free_ph;
650 if (files == current->files) {
651 put_files_struct(files);
652 files = NULL;
653 }
654
655 /* exec will make our files private anyway, but for the a.out
656 loader stuff we need to do it earlier */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 retval = get_unused_fd();
658 if (retval < 0)
659 goto out_free_fh;
660 get_file(bprm->file);
661 fd_install(elf_exec_fileno = retval, bprm->file);
662
663 elf_ppnt = elf_phdata;
664 elf_bss = 0;
665 elf_brk = 0;
666
667 start_code = ~0UL;
668 end_code = 0;
669 start_data = 0;
670 end_data = 0;
671
672 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
673 if (elf_ppnt->p_type == PT_INTERP) {
674 /* This is the program interpreter used for
675 * shared libraries - for now assume that this
676 * is an a.out format binary
677 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678 retval = -ENOEXEC;
679 if (elf_ppnt->p_filesz > PATH_MAX ||
680 elf_ppnt->p_filesz < 2)
681 goto out_free_file;
682
683 retval = -ENOMEM;
Jesper Juhl792db3a2006-01-09 20:54:45 -0800684 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700685 GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 if (!elf_interpreter)
687 goto out_free_file;
688
689 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700690 elf_interpreter,
691 elf_ppnt->p_filesz);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 if (retval != elf_ppnt->p_filesz) {
693 if (retval >= 0)
694 retval = -EIO;
695 goto out_free_interp;
696 }
697 /* make sure path is NULL terminated */
698 retval = -ENOEXEC;
699 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
700 goto out_free_interp;
701
702 /* If the program interpreter is one of these two,
703 * then assume an iBCS2 image. Otherwise assume
704 * a native linux image.
705 */
706 if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
707 strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
708 ibcs2_interpreter = 1;
709
710 /*
711 * The early SET_PERSONALITY here is so that the lookup
712 * for the interpreter happens in the namespace of the
713 * to-be-execed image. SET_PERSONALITY can select an
714 * alternate root.
715 *
716 * However, SET_PERSONALITY is NOT allowed to switch
717 * this task into the new images's memory mapping
718 * policy - that is, TASK_SIZE must still evaluate to
719 * that which is appropriate to the execing application.
720 * This is because exit_mmap() needs to have TASK_SIZE
721 * evaluate to the size of the old image.
722 *
723 * So if (say) a 64-bit application is execing a 32-bit
724 * application it is the architecture's responsibility
725 * to defer changing the value of TASK_SIZE until the
726 * switch really is going to happen - do this in
727 * flush_thread(). - akpm
728 */
729 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
730
731 interpreter = open_exec(elf_interpreter);
732 retval = PTR_ERR(interpreter);
733 if (IS_ERR(interpreter))
734 goto out_free_interp;
Alexey Dobriyan1fb84492007-01-26 00:57:16 -0800735
736 /*
737 * If the binary is not readable then enforce
738 * mm->dumpable = 0 regardless of the interpreter's
739 * permissions.
740 */
741 if (file_permission(interpreter, MAY_READ) < 0)
742 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
743
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700744 retval = kernel_read(interpreter, 0, bprm->buf,
745 BINPRM_BUF_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 if (retval != BINPRM_BUF_SIZE) {
747 if (retval >= 0)
748 retval = -EIO;
749 goto out_free_dentry;
750 }
751
752 /* Get the exec headers */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700753 loc->interp_ex = *((struct exec *)bprm->buf);
754 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 break;
756 }
757 elf_ppnt++;
758 }
759
760 elf_ppnt = elf_phdata;
761 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
762 if (elf_ppnt->p_type == PT_GNU_STACK) {
763 if (elf_ppnt->p_flags & PF_X)
764 executable_stack = EXSTACK_ENABLE_X;
765 else
766 executable_stack = EXSTACK_DISABLE_X;
767 break;
768 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769
770 /* Some simple consistency checks for the interpreter */
771 if (elf_interpreter) {
772 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
773
774 /* Now figure out which format our binary is */
775 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
776 (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
777 (N_MAGIC(loc->interp_ex) != QMAGIC))
778 interpreter_type = INTERPRETER_ELF;
779
780 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
781 interpreter_type &= ~INTERPRETER_ELF;
782
783 retval = -ELIBBAD;
784 if (!interpreter_type)
785 goto out_free_dentry;
786
787 /* Make sure only one type was selected */
788 if ((interpreter_type & INTERPRETER_ELF) &&
789 interpreter_type != INTERPRETER_ELF) {
790 // FIXME - ratelimit this before re-enabling
791 // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
792 interpreter_type = INTERPRETER_ELF;
793 }
794 /* Verify the interpreter has a valid arch */
795 if ((interpreter_type == INTERPRETER_ELF) &&
796 !elf_check_arch(&loc->interp_elf_ex))
797 goto out_free_dentry;
798 } else {
799 /* Executables without an interpreter also need a personality */
800 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
801 }
802
803 /* OK, we are done with that, now set up the arg stuff,
804 and then start this sucker up */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
806 char *passed_p = passed_fileno;
807 sprintf(passed_fileno, "%d", elf_exec_fileno);
808
809 if (elf_interpreter) {
810 retval = copy_strings_kernel(1, &passed_p, bprm);
811 if (retval)
812 goto out_free_dentry;
813 bprm->argc++;
814 }
815 }
816
817 /* Flush all traces of the currently running executable */
818 retval = flush_old_exec(bprm);
819 if (retval)
820 goto out_free_dentry;
821
822 /* Discard our unneeded old files struct */
823 if (files) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 put_files_struct(files);
825 files = NULL;
826 }
827
828 /* OK, This is the point of no return */
829 current->mm->start_data = 0;
830 current->mm->end_data = 0;
831 current->mm->end_code = 0;
832 current->mm->mmap = NULL;
833 current->flags &= ~PF_FORKNOEXEC;
834 current->mm->def_flags = def_flags;
835
836 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
837 may depend on the personality. */
838 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
839 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
840 current->personality |= READ_IMPLIES_EXEC;
841
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700842 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 current->flags |= PF_RANDOMIZE;
844 arch_pick_mmap_layout(current->mm);
845
846 /* Do this so that we can load the interpreter, if need be. We will
847 change some of these later */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 current->mm->free_area_cache = current->mm->mmap_base;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700849 current->mm->cached_hole_size = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
851 executable_stack);
852 if (retval < 0) {
853 send_sig(SIGKILL, current, 0);
854 goto out_free_dentry;
855 }
856
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 current->mm->start_stack = bprm->p;
858
859 /* Now we do a little grungy work by mmaping the ELF image into
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700860 the correct location in memory. */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700861 for(i = 0, elf_ppnt = elf_phdata;
862 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863 int elf_prot = 0, elf_flags;
864 unsigned long k, vaddr;
865
866 if (elf_ppnt->p_type != PT_LOAD)
867 continue;
868
869 if (unlikely (elf_brk > elf_bss)) {
870 unsigned long nbyte;
871
872 /* There was a PT_LOAD segment with p_memsz > p_filesz
873 before this one. Map anonymous pages, if needed,
874 and clear the area. */
875 retval = set_brk (elf_bss + load_bias,
876 elf_brk + load_bias);
877 if (retval) {
878 send_sig(SIGKILL, current, 0);
879 goto out_free_dentry;
880 }
881 nbyte = ELF_PAGEOFFSET(elf_bss);
882 if (nbyte) {
883 nbyte = ELF_MIN_ALIGN - nbyte;
884 if (nbyte > elf_brk - elf_bss)
885 nbyte = elf_brk - elf_bss;
886 if (clear_user((void __user *)elf_bss +
887 load_bias, nbyte)) {
888 /*
889 * This bss-zeroing can fail if the ELF
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700890 * file specifies odd protections. So
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891 * we don't check the return value
892 */
893 }
894 }
895 }
896
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700897 if (elf_ppnt->p_flags & PF_R)
898 elf_prot |= PROT_READ;
899 if (elf_ppnt->p_flags & PF_W)
900 elf_prot |= PROT_WRITE;
901 if (elf_ppnt->p_flags & PF_X)
902 elf_prot |= PROT_EXEC;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700904 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905
906 vaddr = elf_ppnt->p_vaddr;
907 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
908 elf_flags |= MAP_FIXED;
909 } else if (loc->elf_ex.e_type == ET_DYN) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700910 /* Try and get dynamic programs out of the way of the
911 * default mmap base, as well as whatever program they
912 * might try to exec. This is because the brk will
913 * follow the loader, and is not movable. */
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700914#ifdef CONFIG_X86
915 load_bias = 0;
916#else
Linus Torvalds90cb28e2007-01-06 13:28:21 -0800917 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700918#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 }
920
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700921 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700922 elf_prot, elf_flags,0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 if (BAD_ADDR(error)) {
924 send_sig(SIGKILL, current, 0);
Alexey Kuznetsovb140f2512007-05-08 00:31:57 -0700925 retval = IS_ERR((void *)error) ?
926 PTR_ERR((void*)error) : -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 goto out_free_dentry;
928 }
929
930 if (!load_addr_set) {
931 load_addr_set = 1;
932 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
933 if (loc->elf_ex.e_type == ET_DYN) {
934 load_bias += error -
935 ELF_PAGESTART(load_bias + vaddr);
936 load_addr += load_bias;
937 reloc_func_desc = load_bias;
938 }
939 }
940 k = elf_ppnt->p_vaddr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700941 if (k < start_code)
942 start_code = k;
943 if (start_data < k)
944 start_data = k;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945
946 /*
947 * Check to see if the section's size will overflow the
948 * allowed task size. Note that p_filesz must always be
949 * <= p_memsz so it is only necessary to check p_memsz.
950 */
Chuck Ebbertce510592006-07-03 00:24:14 -0700951 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 elf_ppnt->p_memsz > TASK_SIZE ||
953 TASK_SIZE - elf_ppnt->p_memsz < k) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700954 /* set_brk can never work. Avoid overflows. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 send_sig(SIGKILL, current, 0);
Alexey Kuznetsovb140f2512007-05-08 00:31:57 -0700956 retval = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 goto out_free_dentry;
958 }
959
960 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
961
962 if (k > elf_bss)
963 elf_bss = k;
964 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
965 end_code = k;
966 if (end_data < k)
967 end_data = k;
968 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
969 if (k > elf_brk)
970 elf_brk = k;
971 }
972
973 loc->elf_ex.e_entry += load_bias;
974 elf_bss += load_bias;
975 elf_brk += load_bias;
976 start_code += load_bias;
977 end_code += load_bias;
978 start_data += load_bias;
979 end_data += load_bias;
980
981 /* Calling set_brk effectively mmaps the pages that we need
982 * for the bss and break sections. We must do this before
983 * mapping in the interpreter, to make sure it doesn't wind
984 * up getting placed where the bss needs to go.
985 */
986 retval = set_brk(elf_bss, elf_brk);
987 if (retval) {
988 send_sig(SIGKILL, current, 0);
989 goto out_free_dentry;
990 }
akpm@osdl.org6de50512005-10-11 08:29:08 -0700991 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 send_sig(SIGSEGV, current, 0);
993 retval = -EFAULT; /* Nobody gets to see this, but.. */
994 goto out_free_dentry;
995 }
996
997 if (elf_interpreter) {
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700998 if (interpreter_type == INTERPRETER_AOUT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 elf_entry = load_aout_interp(&loc->interp_ex,
1000 interpreter);
Jan Kratochvil60bfba72007-07-15 23:40:06 -07001001 } else {
Andrew Morton4d3b5732007-07-15 23:41:03 -07001002 unsigned long uninitialized_var(interp_map_addr);
Jan Kratochvil60bfba72007-07-15 23:40:06 -07001003
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1005 interpreter,
Jan Kratochvil60bfba72007-07-15 23:40:06 -07001006 &interp_map_addr,
1007 load_bias);
1008 if (!BAD_ADDR(elf_entry)) {
1009 /*
1010 * load_elf_interp() returns relocation
1011 * adjustment
1012 */
1013 interp_load_addr = elf_entry;
1014 elf_entry += loc->interp_elf_ex.e_entry;
1015 }
1016 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 if (BAD_ADDR(elf_entry)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 force_sig(SIGSEGV, current);
Chuck Ebbertce510592006-07-03 00:24:14 -07001019 retval = IS_ERR((void *)elf_entry) ?
1020 (int)elf_entry : -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 goto out_free_dentry;
1022 }
1023 reloc_func_desc = interp_load_addr;
1024
1025 allow_write_access(interpreter);
1026 fput(interpreter);
1027 kfree(elf_interpreter);
1028 } else {
1029 elf_entry = loc->elf_ex.e_entry;
Suresh Siddha5342fba2006-02-26 04:18:28 +01001030 if (BAD_ADDR(elf_entry)) {
Chuck Ebbertce510592006-07-03 00:24:14 -07001031 force_sig(SIGSEGV, current);
1032 retval = -EINVAL;
Suresh Siddha5342fba2006-02-26 04:18:28 +01001033 goto out_free_dentry;
1034 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035 }
1036
1037 kfree(elf_phdata);
1038
1039 if (interpreter_type != INTERPRETER_AOUT)
1040 sys_close(elf_exec_fileno);
1041
1042 set_binfmt(&elf_format);
1043
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -07001044#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1045 retval = arch_setup_additional_pages(bprm, executable_stack);
1046 if (retval < 0) {
1047 send_sig(SIGKILL, current, 0);
Roland McGrath18c8baf2005-04-28 15:17:19 -07001048 goto out;
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -07001049 }
1050#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1051
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 compute_creds(bprm);
1053 current->flags &= ~PF_FORKNOEXEC;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001054 create_elf_tables(bprm, &loc->elf_ex,
1055 (interpreter_type == INTERPRETER_AOUT),
1056 load_addr, interp_load_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 /* N.B. passed_fileno might not be initialized? */
1058 if (interpreter_type == INTERPRETER_AOUT)
1059 current->mm->arg_start += strlen(passed_fileno) + 1;
1060 current->mm->end_code = end_code;
1061 current->mm->start_code = start_code;
1062 current->mm->start_data = start_data;
1063 current->mm->end_data = end_data;
1064 current->mm->start_stack = bprm->p;
1065
1066 if (current->personality & MMAP_PAGE_ZERO) {
1067 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1068 and some applications "depend" upon this behavior.
1069 Since we do not have the power to recompile these, we
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001070 emulate the SVr4 behavior. Sigh. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 down_write(&current->mm->mmap_sem);
1072 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1073 MAP_FIXED | MAP_PRIVATE, 0);
1074 up_write(&current->mm->mmap_sem);
1075 }
1076
1077#ifdef ELF_PLAT_INIT
1078 /*
1079 * The ABI may specify that certain registers be set up in special
1080 * ways (on i386 %edx is the address of a DT_FINI function, for
1081 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1082 * that the e_entry field is the address of the function descriptor
1083 * for the startup routine, rather than the address of the startup
1084 * routine itself. This macro performs whatever initialization to
1085 * the regs structure is required as well as any relocations to the
1086 * function descriptor entries when executing dynamically links apps.
1087 */
1088 ELF_PLAT_INIT(regs, reloc_func_desc);
1089#endif
1090
1091 start_thread(regs, elf_entry, bprm->p);
1092 if (unlikely(current->ptrace & PT_PTRACED)) {
1093 if (current->ptrace & PT_TRACE_EXEC)
1094 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1095 else
1096 send_sig(SIGTRAP, current, 0);
1097 }
1098 retval = 0;
1099out:
1100 kfree(loc);
1101out_ret:
1102 return retval;
1103
1104 /* error cleanup */
1105out_free_dentry:
1106 allow_write_access(interpreter);
1107 if (interpreter)
1108 fput(interpreter);
1109out_free_interp:
Jesper Juhlf99d49a2005-11-07 01:01:34 -08001110 kfree(elf_interpreter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111out_free_file:
1112 sys_close(elf_exec_fileno);
1113out_free_fh:
Kirill Korotaev3b9b8ab2006-09-29 02:00:05 -07001114 if (files)
1115 reset_files_struct(current, files);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116out_free_ph:
1117 kfree(elf_phdata);
1118 goto out;
1119}
1120
1121/* This is really simpleminded and specialized - we are loading an
1122 a.out library that is given an ELF header. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123static int load_elf_library(struct file *file)
1124{
1125 struct elf_phdr *elf_phdata;
1126 struct elf_phdr *eppnt;
1127 unsigned long elf_bss, bss, len;
1128 int retval, error, i, j;
1129 struct elfhdr elf_ex;
1130
1131 error = -ENOEXEC;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001132 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 if (retval != sizeof(elf_ex))
1134 goto out;
1135
1136 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1137 goto out;
1138
1139 /* First of all, some simple consistency checks */
1140 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001141 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 goto out;
1143
1144 /* Now read in all of the header information */
1145
1146 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1147 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1148
1149 error = -ENOMEM;
1150 elf_phdata = kmalloc(j, GFP_KERNEL);
1151 if (!elf_phdata)
1152 goto out;
1153
1154 eppnt = elf_phdata;
1155 error = -ENOEXEC;
1156 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1157 if (retval != j)
1158 goto out_free_ph;
1159
1160 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1161 if ((eppnt + i)->p_type == PT_LOAD)
1162 j++;
1163 if (j != 1)
1164 goto out_free_ph;
1165
1166 while (eppnt->p_type != PT_LOAD)
1167 eppnt++;
1168
1169 /* Now use mmap to map the library into memory. */
1170 down_write(&current->mm->mmap_sem);
1171 error = do_mmap(file,
1172 ELF_PAGESTART(eppnt->p_vaddr),
1173 (eppnt->p_filesz +
1174 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1175 PROT_READ | PROT_WRITE | PROT_EXEC,
1176 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1177 (eppnt->p_offset -
1178 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1179 up_write(&current->mm->mmap_sem);
1180 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1181 goto out_free_ph;
1182
1183 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1184 if (padzero(elf_bss)) {
1185 error = -EFAULT;
1186 goto out_free_ph;
1187 }
1188
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001189 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1190 ELF_MIN_ALIGN - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 bss = eppnt->p_memsz + eppnt->p_vaddr;
1192 if (bss > len) {
1193 down_write(&current->mm->mmap_sem);
1194 do_brk(len, bss - len);
1195 up_write(&current->mm->mmap_sem);
1196 }
1197 error = 0;
1198
1199out_free_ph:
1200 kfree(elf_phdata);
1201out:
1202 return error;
1203}
1204
1205/*
1206 * Note that some platforms still use traditional core dumps and not
1207 * the ELF core dump. Each platform can select it as appropriate.
1208 */
Matt Mackall708e9a72006-01-08 01:05:25 -08001209#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210
1211/*
1212 * ELF core dumper
1213 *
1214 * Modelled on fs/exec.c:aout_core_dump()
1215 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1216 */
1217/*
1218 * These are the only things you should do on a core-file: use only these
1219 * functions to write out all the necessary info.
1220 */
1221static int dump_write(struct file *file, const void *addr, int nr)
1222{
1223 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1224}
1225
Daniel Jacobowitz5db92852005-06-15 22:26:34 -07001226static int dump_seek(struct file *file, loff_t off)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227{
Andi Kleend025c9d2006-09-30 23:29:28 -07001228 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
Petr Vandrovec7f14daa2006-10-13 04:13:16 +02001229 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 return 0;
Andi Kleend025c9d2006-09-30 23:29:28 -07001231 } else {
1232 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1233 if (!buf)
1234 return 0;
1235 while (off > 0) {
1236 unsigned long n = off;
1237 if (n > PAGE_SIZE)
1238 n = PAGE_SIZE;
1239 if (!dump_write(file, buf, n))
1240 return 0;
1241 off -= n;
1242 }
1243 free_page((unsigned long)buf);
1244 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245 return 1;
1246}
1247
1248/*
1249 * Decide whether a segment is worth dumping; default is yes to be
1250 * sure (missing info is worse than too much; etc).
1251 * Personally I'd include everything, and use the coredump limit...
1252 *
1253 * I think we should skip something. But I am not sure how. H.J.
1254 */
1255static int maydump(struct vm_area_struct *vma)
1256{
Roland McGrathe5b97dd2007-01-26 00:56:48 -08001257 /* The vma can be set up to tell us the answer directly. */
1258 if (vma->vm_flags & VM_ALWAYSDUMP)
1259 return 1;
1260
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261 /* Do not dump I/O mapped devices or special mappings */
1262 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1263 return 0;
1264
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001265 /* Dump shared memory only if mapped from an anonymous file. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 if (vma->vm_flags & VM_SHARED)
Josef "Jeff" Sipek0f7fc9e2006-12-08 02:36:35 -08001267 return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268
1269 /* If it hasn't been written to, don't write it out */
1270 if (!vma->anon_vma)
1271 return 0;
1272
1273 return 1;
1274}
1275
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276/* An ELF note in memory */
1277struct memelfnote
1278{
1279 const char *name;
1280 int type;
1281 unsigned int datasz;
1282 void *data;
1283};
1284
1285static int notesize(struct memelfnote *en)
1286{
1287 int sz;
1288
1289 sz = sizeof(struct elf_note);
1290 sz += roundup(strlen(en->name) + 1, 4);
1291 sz += roundup(en->datasz, 4);
1292
1293 return sz;
1294}
1295
Andi Kleend025c9d2006-09-30 23:29:28 -07001296#define DUMP_WRITE(addr, nr, foffset) \
1297 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298
Andi Kleend025c9d2006-09-30 23:29:28 -07001299static int alignfile(struct file *file, loff_t *foffset)
1300{
Petr Vandroveca7a0d862006-10-13 18:42:07 +02001301 static const char buf[4] = { 0, };
Andi Kleend025c9d2006-09-30 23:29:28 -07001302 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1303 return 1;
1304}
1305
1306static int writenote(struct memelfnote *men, struct file *file,
1307 loff_t *foffset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308{
1309 struct elf_note en;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 en.n_namesz = strlen(men->name) + 1;
1311 en.n_descsz = men->datasz;
1312 en.n_type = men->type;
1313
Andi Kleend025c9d2006-09-30 23:29:28 -07001314 DUMP_WRITE(&en, sizeof(en), foffset);
1315 DUMP_WRITE(men->name, en.n_namesz, foffset);
1316 if (!alignfile(file, foffset))
1317 return 0;
1318 DUMP_WRITE(men->data, men->datasz, foffset);
1319 if (!alignfile(file, foffset))
1320 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321
1322 return 1;
1323}
1324#undef DUMP_WRITE
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325
1326#define DUMP_WRITE(addr, nr) \
1327 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1328 goto end_coredump;
1329#define DUMP_SEEK(off) \
1330 if (!dump_seek(file, (off))) \
1331 goto end_coredump;
1332
Arjan van de Ven858119e2006-01-14 13:20:43 -08001333static void fill_elf_header(struct elfhdr *elf, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334{
1335 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1336 elf->e_ident[EI_CLASS] = ELF_CLASS;
1337 elf->e_ident[EI_DATA] = ELF_DATA;
1338 elf->e_ident[EI_VERSION] = EV_CURRENT;
1339 elf->e_ident[EI_OSABI] = ELF_OSABI;
1340 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1341
1342 elf->e_type = ET_CORE;
1343 elf->e_machine = ELF_ARCH;
1344 elf->e_version = EV_CURRENT;
1345 elf->e_entry = 0;
1346 elf->e_phoff = sizeof(struct elfhdr);
1347 elf->e_shoff = 0;
1348 elf->e_flags = ELF_CORE_EFLAGS;
1349 elf->e_ehsize = sizeof(struct elfhdr);
1350 elf->e_phentsize = sizeof(struct elf_phdr);
1351 elf->e_phnum = segs;
1352 elf->e_shentsize = 0;
1353 elf->e_shnum = 0;
1354 elf->e_shstrndx = 0;
1355 return;
1356}
1357
Andrew Morton8d6b5eee2006-09-25 23:32:04 -07001358static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359{
1360 phdr->p_type = PT_NOTE;
1361 phdr->p_offset = offset;
1362 phdr->p_vaddr = 0;
1363 phdr->p_paddr = 0;
1364 phdr->p_filesz = sz;
1365 phdr->p_memsz = 0;
1366 phdr->p_flags = 0;
1367 phdr->p_align = 0;
1368 return;
1369}
1370
1371static void fill_note(struct memelfnote *note, const char *name, int type,
1372 unsigned int sz, void *data)
1373{
1374 note->name = name;
1375 note->type = type;
1376 note->datasz = sz;
1377 note->data = data;
1378 return;
1379}
1380
1381/*
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001382 * fill up all the fields in prstatus from the given task struct, except
1383 * registers which need to be filled up separately.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 */
1385static void fill_prstatus(struct elf_prstatus *prstatus,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001386 struct task_struct *p, long signr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387{
1388 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1389 prstatus->pr_sigpend = p->pending.signal.sig[0];
1390 prstatus->pr_sighold = p->blocked.sig[0];
1391 prstatus->pr_pid = p->pid;
1392 prstatus->pr_ppid = p->parent->pid;
1393 prstatus->pr_pgrp = process_group(p);
Cedric Le Goater937949d2006-12-08 02:37:54 -08001394 prstatus->pr_sid = process_session(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395 if (thread_group_leader(p)) {
1396 /*
1397 * This is the record for the group leader. Add in the
1398 * cumulative times of previous dead threads. This total
1399 * won't include the time of each live thread whose state
1400 * is included in the core dump. The final total reported
1401 * to our parent process when it calls wait4 will include
1402 * those sums as well as the little bit more time it takes
1403 * this and each other thread to finish dying after the
1404 * core dump synchronization phase.
1405 */
1406 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1407 &prstatus->pr_utime);
1408 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1409 &prstatus->pr_stime);
1410 } else {
1411 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1412 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1413 }
1414 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1415 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1416}
1417
1418static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1419 struct mm_struct *mm)
1420{
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -07001421 unsigned int i, len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422
1423 /* first copy the parameters from user space */
1424 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1425
1426 len = mm->arg_end - mm->arg_start;
1427 if (len >= ELF_PRARGSZ)
1428 len = ELF_PRARGSZ-1;
1429 if (copy_from_user(&psinfo->pr_psargs,
1430 (const char __user *)mm->arg_start, len))
1431 return -EFAULT;
1432 for(i = 0; i < len; i++)
1433 if (psinfo->pr_psargs[i] == 0)
1434 psinfo->pr_psargs[i] = ' ';
1435 psinfo->pr_psargs[len] = 0;
1436
1437 psinfo->pr_pid = p->pid;
1438 psinfo->pr_ppid = p->parent->pid;
1439 psinfo->pr_pgrp = process_group(p);
Cedric Le Goater937949d2006-12-08 02:37:54 -08001440 psinfo->pr_sid = process_session(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441
1442 i = p->state ? ffz(~p->state) + 1 : 0;
1443 psinfo->pr_state = i;
Carsten Otte55148542006-03-25 03:08:22 -08001444 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1446 psinfo->pr_nice = task_nice(p);
1447 psinfo->pr_flag = p->flags;
1448 SET_UID(psinfo->pr_uid, p->uid);
1449 SET_GID(psinfo->pr_gid, p->gid);
1450 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1451
1452 return 0;
1453}
1454
1455/* Here is the structure in which status of each thread is captured. */
1456struct elf_thread_status
1457{
1458 struct list_head list;
1459 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1460 elf_fpregset_t fpu; /* NT_PRFPREG */
1461 struct task_struct *thread;
1462#ifdef ELF_CORE_COPY_XFPREGS
1463 elf_fpxregset_t xfpu; /* NT_PRXFPREG */
1464#endif
1465 struct memelfnote notes[3];
1466 int num_notes;
1467};
1468
1469/*
1470 * In order to add the specific thread information for the elf file format,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001471 * we need to keep a linked list of every threads pr_status and then create
1472 * a single section for them in the final core file.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 */
1474static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1475{
1476 int sz = 0;
1477 struct task_struct *p = t->thread;
1478 t->num_notes = 0;
1479
1480 fill_prstatus(&t->prstatus, p, signr);
1481 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1482
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001483 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1484 &(t->prstatus));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 t->num_notes++;
1486 sz += notesize(&t->notes[0]);
1487
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001488 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1489 &t->fpu))) {
1490 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1491 &(t->fpu));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 t->num_notes++;
1493 sz += notesize(&t->notes[1]);
1494 }
1495
1496#ifdef ELF_CORE_COPY_XFPREGS
1497 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001498 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1499 &t->xfpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 t->num_notes++;
1501 sz += notesize(&t->notes[2]);
1502 }
1503#endif
1504 return sz;
1505}
1506
Roland McGrathf47aef52007-01-26 00:56:49 -08001507static struct vm_area_struct *first_vma(struct task_struct *tsk,
1508 struct vm_area_struct *gate_vma)
1509{
1510 struct vm_area_struct *ret = tsk->mm->mmap;
1511
1512 if (ret)
1513 return ret;
1514 return gate_vma;
1515}
1516/*
1517 * Helper function for iterating across a vma list. It ensures that the caller
1518 * will visit `gate_vma' prior to terminating the search.
1519 */
1520static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1521 struct vm_area_struct *gate_vma)
1522{
1523 struct vm_area_struct *ret;
1524
1525 ret = this_vma->vm_next;
1526 if (ret)
1527 return ret;
1528 if (this_vma == gate_vma)
1529 return NULL;
1530 return gate_vma;
1531}
1532
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533/*
1534 * Actual dumper
1535 *
1536 * This is a two-pass process; first we find the offsets of the bits,
1537 * and then they are actually written out. If we run out of core limit
1538 * we just truncate.
1539 */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001540static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541{
1542#define NUM_NOTES 6
1543 int has_dumped = 0;
1544 mm_segment_t fs;
1545 int segs;
1546 size_t size = 0;
1547 int i;
Roland McGrathf47aef52007-01-26 00:56:49 -08001548 struct vm_area_struct *vma, *gate_vma;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 struct elfhdr *elf = NULL;
Andi Kleend025c9d2006-09-30 23:29:28 -07001550 loff_t offset = 0, dataoff, foffset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1552 int numnote;
1553 struct memelfnote *notes = NULL;
1554 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1555 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1556 struct task_struct *g, *p;
1557 LIST_HEAD(thread_list);
1558 struct list_head *t;
1559 elf_fpregset_t *fpu = NULL;
1560#ifdef ELF_CORE_COPY_XFPREGS
1561 elf_fpxregset_t *xfpu = NULL;
1562#endif
1563 int thread_status_size = 0;
1564 elf_addr_t *auxv;
Michael Ellermanef7320e2007-07-06 02:39:49 -07001565#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1566 int extra_notes_size;
1567#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568
1569 /*
1570 * We no longer stop all VM operations.
1571 *
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001572 * This is because those proceses that could possibly change map_count
1573 * or the mmap / vma pages are now blocked in do_exit on current
1574 * finishing this core dump.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 *
1576 * Only ptrace can touch these memory addresses, but it doesn't change
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001577 * the map_count or the pages allocated. So no possibility of crashing
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578 * exists while dumping the mm->vm_next areas to the core file.
1579 */
1580
1581 /* alloc memory for large data structures: too large to be on stack */
1582 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1583 if (!elf)
1584 goto cleanup;
1585 prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1586 if (!prstatus)
1587 goto cleanup;
1588 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1589 if (!psinfo)
1590 goto cleanup;
1591 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1592 if (!notes)
1593 goto cleanup;
1594 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1595 if (!fpu)
1596 goto cleanup;
1597#ifdef ELF_CORE_COPY_XFPREGS
1598 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1599 if (!xfpu)
1600 goto cleanup;
1601#endif
1602
1603 if (signr) {
1604 struct elf_thread_status *tmp;
Oleg Nesterov486ccb02006-09-29 02:00:24 -07001605 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 do_each_thread(g,p)
1607 if (current->mm == p->mm && current != p) {
Oliver Neukum11b0b5a2006-03-25 03:08:13 -08001608 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 if (!tmp) {
Oleg Nesterov486ccb02006-09-29 02:00:24 -07001610 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611 goto cleanup;
1612 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 tmp->thread = p;
1614 list_add(&tmp->list, &thread_list);
1615 }
1616 while_each_thread(g,p);
Oleg Nesterov486ccb02006-09-29 02:00:24 -07001617 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 list_for_each(t, &thread_list) {
1619 struct elf_thread_status *tmp;
1620 int sz;
1621
1622 tmp = list_entry(t, struct elf_thread_status, list);
1623 sz = elf_dump_thread_status(signr, tmp);
1624 thread_status_size += sz;
1625 }
1626 }
1627 /* now collect the dump for the current */
1628 memset(prstatus, 0, sizeof(*prstatus));
1629 fill_prstatus(prstatus, current, signr);
1630 elf_core_copy_regs(&prstatus->pr_reg, regs);
1631
1632 segs = current->mm->map_count;
1633#ifdef ELF_CORE_EXTRA_PHDRS
1634 segs += ELF_CORE_EXTRA_PHDRS;
1635#endif
1636
Roland McGrathf47aef52007-01-26 00:56:49 -08001637 gate_vma = get_gate_vma(current);
1638 if (gate_vma != NULL)
1639 segs++;
1640
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 /* Set up header */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001642 fill_elf_header(elf, segs + 1); /* including notes section */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643
1644 has_dumped = 1;
1645 current->flags |= PF_DUMPCORE;
1646
1647 /*
1648 * Set up the notes in similar form to SVR4 core dumps made
1649 * with info from their /proc.
1650 */
1651
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001652 fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 fill_psinfo(psinfo, current->group_leader, current->mm);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001654 fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655
Eric W. Biedermana9289722005-10-30 15:02:08 -08001656 numnote = 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001658 auxv = (elf_addr_t *)current->mm->saved_auxv;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659
1660 i = 0;
1661 do
1662 i += 2;
1663 while (auxv[i - 2] != AT_NULL);
1664 fill_note(&notes[numnote++], "CORE", NT_AUXV,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001665 i * sizeof(elf_addr_t), auxv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666
1667 /* Try to dump the FPU. */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001668 if ((prstatus->pr_fpvalid =
1669 elf_core_copy_task_fpregs(current, regs, fpu)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 fill_note(notes + numnote++,
1671 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1672#ifdef ELF_CORE_COPY_XFPREGS
1673 if (elf_core_copy_task_xfpregs(current, xfpu))
1674 fill_note(notes + numnote++,
1675 "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1676#endif
1677
1678 fs = get_fs();
1679 set_fs(KERNEL_DS);
1680
1681 DUMP_WRITE(elf, sizeof(*elf));
1682 offset += sizeof(*elf); /* Elf header */
Petr Vandroveca7a0d862006-10-13 18:42:07 +02001683 offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1684 foffset = offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685
1686 /* Write notes phdr entry */
1687 {
1688 struct elf_phdr phdr;
1689 int sz = 0;
1690
1691 for (i = 0; i < numnote; i++)
1692 sz += notesize(notes + i);
1693
1694 sz += thread_status_size;
1695
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001696#ifdef ELF_CORE_WRITE_EXTRA_NOTES
Michael Ellermanef7320e2007-07-06 02:39:49 -07001697 extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1698 sz += extra_notes_size;
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001699#endif
1700
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 fill_elf_note_phdr(&phdr, sz, offset);
1702 offset += sz;
1703 DUMP_WRITE(&phdr, sizeof(phdr));
1704 }
1705
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1707
1708 /* Write program headers for segments dump */
Roland McGrathf47aef52007-01-26 00:56:49 -08001709 for (vma = first_vma(current, gate_vma); vma != NULL;
1710 vma = next_vma(vma, gate_vma)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 struct elf_phdr phdr;
1712 size_t sz;
1713
1714 sz = vma->vm_end - vma->vm_start;
1715
1716 phdr.p_type = PT_LOAD;
1717 phdr.p_offset = offset;
1718 phdr.p_vaddr = vma->vm_start;
1719 phdr.p_paddr = 0;
1720 phdr.p_filesz = maydump(vma) ? sz : 0;
1721 phdr.p_memsz = sz;
1722 offset += phdr.p_filesz;
1723 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001724 if (vma->vm_flags & VM_WRITE)
1725 phdr.p_flags |= PF_W;
1726 if (vma->vm_flags & VM_EXEC)
1727 phdr.p_flags |= PF_X;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 phdr.p_align = ELF_EXEC_PAGESIZE;
1729
1730 DUMP_WRITE(&phdr, sizeof(phdr));
1731 }
1732
1733#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1734 ELF_CORE_WRITE_EXTRA_PHDRS;
1735#endif
1736
1737 /* write out the notes section */
1738 for (i = 0; i < numnote; i++)
Andi Kleend025c9d2006-09-30 23:29:28 -07001739 if (!writenote(notes + i, file, &foffset))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 goto end_coredump;
1741
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001742#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1743 ELF_CORE_WRITE_EXTRA_NOTES;
Michael Ellermanef7320e2007-07-06 02:39:49 -07001744 foffset += extra_notes_size;
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001745#endif
1746
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 /* write out the thread status notes section */
1748 list_for_each(t, &thread_list) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001749 struct elf_thread_status *tmp =
1750 list_entry(t, struct elf_thread_status, list);
1751
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752 for (i = 0; i < tmp->num_notes; i++)
Andi Kleend025c9d2006-09-30 23:29:28 -07001753 if (!writenote(&tmp->notes[i], file, &foffset))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754 goto end_coredump;
1755 }
Andi Kleend025c9d2006-09-30 23:29:28 -07001756
1757 /* Align to page */
1758 DUMP_SEEK(dataoff - foffset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759
Roland McGrathf47aef52007-01-26 00:56:49 -08001760 for (vma = first_vma(current, gate_vma); vma != NULL;
1761 vma = next_vma(vma, gate_vma)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 unsigned long addr;
1763
1764 if (!maydump(vma))
1765 continue;
1766
1767 for (addr = vma->vm_start;
1768 addr < vma->vm_end;
1769 addr += PAGE_SIZE) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001770 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771 struct vm_area_struct *vma;
1772
1773 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1774 &page, &vma) <= 0) {
Andi Kleend025c9d2006-09-30 23:29:28 -07001775 DUMP_SEEK(PAGE_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776 } else {
1777 if (page == ZERO_PAGE(addr)) {
Brian Pomerantz03221702007-04-01 23:49:41 -07001778 if (!dump_seek(file, PAGE_SIZE)) {
1779 page_cache_release(page);
1780 goto end_coredump;
1781 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782 } else {
1783 void *kaddr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001784 flush_cache_page(vma, addr,
1785 page_to_pfn(page));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786 kaddr = kmap(page);
1787 if ((size += PAGE_SIZE) > limit ||
1788 !dump_write(file, kaddr,
1789 PAGE_SIZE)) {
1790 kunmap(page);
1791 page_cache_release(page);
1792 goto end_coredump;
1793 }
1794 kunmap(page);
1795 }
1796 page_cache_release(page);
1797 }
1798 }
1799 }
1800
1801#ifdef ELF_CORE_WRITE_EXTRA_DATA
1802 ELF_CORE_WRITE_EXTRA_DATA;
1803#endif
1804
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805end_coredump:
1806 set_fs(fs);
1807
1808cleanup:
Jesper Juhl74da6cd2006-01-11 01:51:26 +01001809 while (!list_empty(&thread_list)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810 struct list_head *tmp = thread_list.next;
1811 list_del(tmp);
1812 kfree(list_entry(tmp, struct elf_thread_status, list));
1813 }
1814
1815 kfree(elf);
1816 kfree(prstatus);
1817 kfree(psinfo);
1818 kfree(notes);
1819 kfree(fpu);
1820#ifdef ELF_CORE_COPY_XFPREGS
1821 kfree(xfpu);
1822#endif
1823 return has_dumped;
1824#undef NUM_NOTES
1825}
1826
1827#endif /* USE_ELF_CORE_DUMP */
1828
1829static int __init init_elf_binfmt(void)
1830{
1831 return register_binfmt(&elf_format);
1832}
1833
1834static void __exit exit_elf_binfmt(void)
1835{
1836 /* Remove the COFF and ELF loaders. */
1837 unregister_binfmt(&elf_format);
1838}
1839
1840core_initcall(init_elf_binfmt);
1841module_exit(exit_elf_binfmt);
1842MODULE_LICENSE("GPL");