blob: 10462efe5a135dcf40ad84d4e1646f37964ffc19 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/fs.h>
15#include <linux/stat.h>
16#include <linux/time.h>
17#include <linux/mm.h>
18#include <linux/mman.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/errno.h>
20#include <linux/signal.h>
21#include <linux/binfmts.h>
22#include <linux/string.h>
23#include <linux/file.h>
24#include <linux/fcntl.h>
25#include <linux/ptrace.h>
26#include <linux/slab.h>
27#include <linux/shm.h>
28#include <linux/personality.h>
29#include <linux/elfcore.h>
30#include <linux/init.h>
31#include <linux/highuid.h>
32#include <linux/smp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/compiler.h>
34#include <linux/highmem.h>
35#include <linux/pagemap.h>
36#include <linux/security.h>
37#include <linux/syscalls.h>
38#include <linux/random.h>
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070039#include <linux/elf.h>
Alexey Dobriyan7e80d0d2007-05-08 00:28:59 -070040#include <linux/utsname.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <asm/uaccess.h>
42#include <asm/param.h>
43#include <asm/page.h>
44
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070045static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
46static int load_elf_library(struct file *);
Andrew Mortonbb1ad822008-01-30 13:31:07 +010047static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
48 int, int, unsigned long);
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
Linus Torvalds1da177e2005-04-16 15:20:36 -070050/*
51 * If we don't support core dumping, then supply a NULL so we
52 * don't even try.
53 */
Matt Mackall708e9a72006-01-08 01:05:25 -080054#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Neil Horman7dc0b222007-10-16 23:26:34 -070055static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#else
57#define elf_core_dump NULL
58#endif
59
60#if ELF_EXEC_PAGESIZE > PAGE_SIZE
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070061#define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#else
Jesper Juhlf4e5cc22006-06-23 02:05:35 -070063#define ELF_MIN_ALIGN PAGE_SIZE
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#endif
65
66#ifndef ELF_CORE_EFLAGS
67#define ELF_CORE_EFLAGS 0
68#endif
69
70#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74static struct linux_binfmt elf_format = {
75 .module = THIS_MODULE,
76 .load_binary = load_elf_binary,
77 .load_shlib = load_elf_library,
78 .core_dump = elf_core_dump,
Andi Kleen9fbbd4d2007-02-13 13:26:26 +010079 .min_coredump = ELF_EXEC_PAGESIZE,
80 .hasvdso = 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070081};
82
Andrew Mortond4e3cc32007-07-21 04:37:32 -070083#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int set_brk(unsigned long start, unsigned long end)
86{
87 start = ELF_PAGEALIGN(start);
88 end = ELF_PAGEALIGN(end);
89 if (end > start) {
90 unsigned long addr;
91 down_write(&current->mm->mmap_sem);
92 addr = do_brk(start, end - start);
93 up_write(&current->mm->mmap_sem);
94 if (BAD_ADDR(addr))
95 return addr;
96 }
97 current->mm->start_brk = current->mm->brk = end;
98 return 0;
99}
100
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101/* We need to explicitly zero any fractional pages
102 after the data section (i.e. bss). This would
103 contain the junk from the file that should not
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700104 be in memory
105 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106static int padzero(unsigned long elf_bss)
107{
108 unsigned long nbyte;
109
110 nbyte = ELF_PAGEOFFSET(elf_bss);
111 if (nbyte) {
112 nbyte = ELF_MIN_ALIGN - nbyte;
113 if (clear_user((void __user *) elf_bss, nbyte))
114 return -EFAULT;
115 }
116 return 0;
117}
118
Ohad Ben-Cohen09c6dd32008-02-03 18:05:15 +0200119/* Let's use some macros to make this stack manipulation a little clearer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120#ifdef CONFIG_STACK_GROWSUP
121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122#define STACK_ROUND(sp, items) \
123 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700124#define STACK_ALLOC(sp, len) ({ \
125 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 old_sp; })
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127#else
128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129#define STACK_ROUND(sp, items) \
130 (((unsigned long) (sp - items)) &~ 15UL)
131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132#endif
133
Nathan Lynch483fad12008-07-22 04:48:46 +1000134#ifndef ELF_BASE_PLATFORM
135/*
136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138 * will be copied to the user stack in the same manner as AT_PLATFORM.
139 */
140#define ELF_BASE_PLATFORM NULL
141#endif
142
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143static int
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700144create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
Andi Kleend20894a2008-02-08 04:21:54 -0800145 unsigned long load_addr, unsigned long interp_load_addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146{
147 unsigned long p = bprm->p;
148 int argc = bprm->argc;
149 int envc = bprm->envc;
150 elf_addr_t __user *argv;
151 elf_addr_t __user *envp;
152 elf_addr_t __user *sp;
153 elf_addr_t __user *u_platform;
Nathan Lynch483fad12008-07-22 04:48:46 +1000154 elf_addr_t __user *u_base_platform;
Kees Cookf06295b2009-01-07 18:08:52 -0800155 elf_addr_t __user *u_rand_bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 const char *k_platform = ELF_PLATFORM;
Nathan Lynch483fad12008-07-22 04:48:46 +1000157 const char *k_base_platform = ELF_BASE_PLATFORM;
Kees Cookf06295b2009-01-07 18:08:52 -0800158 unsigned char k_rand_bytes[16];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 int items;
160 elf_addr_t *elf_info;
161 int ei_index = 0;
David Howells86a264a2008-11-14 10:39:18 +1100162 const struct cred *cred = current_cred();
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700163 struct vm_area_struct *vma;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164
165 /*
Franck Bui-Huud68c9d62007-10-16 23:30:24 -0700166 * In some cases (e.g. Hyper-Threading), we want to avoid L1
167 * evictions by the processes running on the same package. One
168 * thing we can do is to shuffle the initial stack for them.
169 */
170
171 p = arch_align_stack(p);
172
173 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 * If this architecture has a platform capability string, copy it
175 * to userspace. In some cases (Sparc), this info is impossible
176 * for userspace to get any other way, in others (i386) it is
177 * merely difficult.
178 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 u_platform = NULL;
180 if (k_platform) {
181 size_t len = strlen(k_platform) + 1;
182
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184 if (__copy_to_user(u_platform, k_platform, len))
185 return -EFAULT;
186 }
187
Nathan Lynch483fad12008-07-22 04:48:46 +1000188 /*
189 * If this architecture has a "base" platform capability
190 * string, copy it to userspace.
191 */
192 u_base_platform = NULL;
193 if (k_base_platform) {
194 size_t len = strlen(k_base_platform) + 1;
195
196 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197 if (__copy_to_user(u_base_platform, k_base_platform, len))
198 return -EFAULT;
199 }
200
Kees Cookf06295b2009-01-07 18:08:52 -0800201 /*
202 * Generate 16 random bytes for userspace PRNG seeding.
203 */
204 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205 u_rand_bytes = (elf_addr_t __user *)
206 STACK_ALLOC(p, sizeof(k_rand_bytes));
207 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208 return -EFAULT;
209
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 /* Create the ELF interpreter info */
Jesper Juhl785d5572006-06-23 02:05:35 -0700211 elf_info = (elf_addr_t *)current->mm->saved_auxv;
Olaf Hering4f9a58d2007-10-16 23:30:12 -0700212 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213#define NEW_AUX_ENT(id, val) \
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700214 do { \
Jesper Juhl785d5572006-06-23 02:05:35 -0700215 elf_info[ei_index++] = id; \
216 elf_info[ei_index++] = val; \
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700217 } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218
219#ifdef ARCH_DLINFO
220 /*
221 * ARCH_DLINFO must come first so PPC can do its special alignment of
222 * AUXV.
Olaf Hering4f9a58d2007-10-16 23:30:12 -0700223 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224 * ARCH_DLINFO changes
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 */
226 ARCH_DLINFO;
227#endif
228 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700232 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234 NEW_AUX_ENT(AT_BASE, interp_load_addr);
235 NEW_AUX_ENT(AT_FLAGS, 0);
236 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
David Howells86a264a2008-11-14 10:39:18 +1100237 NEW_AUX_ENT(AT_UID, cred->uid);
238 NEW_AUX_ENT(AT_EUID, cred->euid);
239 NEW_AUX_ENT(AT_GID, cred->gid);
240 NEW_AUX_ENT(AT_EGID, cred->egid);
Jesper Juhl785d5572006-06-23 02:05:35 -0700241 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
Kees Cookf06295b2009-01-07 18:08:52 -0800242 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
John Reiser65191082008-07-21 14:21:32 -0700243 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 if (k_platform) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700245 NEW_AUX_ENT(AT_PLATFORM,
Jesper Juhl785d5572006-06-23 02:05:35 -0700246 (elf_addr_t)(unsigned long)u_platform);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 }
Nathan Lynch483fad12008-07-22 04:48:46 +1000248 if (k_base_platform) {
249 NEW_AUX_ENT(AT_BASE_PLATFORM,
250 (elf_addr_t)(unsigned long)u_base_platform);
251 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
Jesper Juhl785d5572006-06-23 02:05:35 -0700253 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 }
255#undef NEW_AUX_ENT
256 /* AT_NULL is zero; clear the rest too */
257 memset(&elf_info[ei_index], 0,
258 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
259
260 /* And advance past the AT_NULL entry. */
261 ei_index += 2;
262
263 sp = STACK_ADD(p, ei_index);
264
Andi Kleend20894a2008-02-08 04:21:54 -0800265 items = (argc + 1) + (envc + 1) + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 bprm->p = STACK_ROUND(sp, items);
267
268 /* Point sp at the lowest address on the stack */
269#ifdef CONFIG_STACK_GROWSUP
270 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700271 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272#else
273 sp = (elf_addr_t __user *)bprm->p;
274#endif
275
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700276
277 /*
278 * Grow the stack manually; some architectures have a limit on how
279 * far ahead a user-space access may be in order to grow the stack.
280 */
281 vma = find_extend_vma(current->mm, bprm->p);
282 if (!vma)
283 return -EFAULT;
284
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
286 if (__put_user(argc, sp++))
287 return -EFAULT;
Andi Kleend20894a2008-02-08 04:21:54 -0800288 argv = sp;
289 envp = argv + argc + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290
291 /* Populate argv and envp */
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -0700292 p = current->mm->arg_end = current->mm->arg_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293 while (argc-- > 0) {
294 size_t len;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800295 if (__put_user((elf_addr_t)p, argv++))
296 return -EFAULT;
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700297 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
298 if (!len || len > MAX_ARG_STRLEN)
WANG Cong23c49712008-05-08 21:52:33 +0800299 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 p += len;
301 }
302 if (__put_user(0, argv))
303 return -EFAULT;
304 current->mm->arg_end = current->mm->env_start = p;
305 while (envc-- > 0) {
306 size_t len;
Heiko Carstens841d5fb2006-12-06 20:36:35 -0800307 if (__put_user((elf_addr_t)p, envp++))
308 return -EFAULT;
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700309 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
310 if (!len || len > MAX_ARG_STRLEN)
WANG Cong23c49712008-05-08 21:52:33 +0800311 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 p += len;
313 }
314 if (__put_user(0, envp))
315 return -EFAULT;
316 current->mm->env_end = p;
317
318 /* Put the elf_info on the stack in the right place. */
319 sp = (elf_addr_t __user *)envp + 1;
320 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
321 return -EFAULT;
322 return 0;
323}
324
325#ifndef elf_map
326
327static unsigned long elf_map(struct file *filep, unsigned long addr,
Jiri Kosinacc503c12008-01-30 13:31:07 +0100328 struct elf_phdr *eppnt, int prot, int type,
329 unsigned long total_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330{
331 unsigned long map_addr;
Jiri Kosinacc503c12008-01-30 13:31:07 +0100332 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
333 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
334 addr = ELF_PAGESTART(addr);
335 size = ELF_PAGEALIGN(size);
Jan Kratochvil60bfba72007-07-15 23:40:06 -0700336
Andrew Mortond4e3cc32007-07-21 04:37:32 -0700337 /* mmap() will return -EINVAL if given a zero size, but a
338 * segment with zero filesize is perfectly valid */
Jiri Kosinacc503c12008-01-30 13:31:07 +0100339 if (!size)
340 return addr;
341
342 down_write(&current->mm->mmap_sem);
343 /*
344 * total_size is the size of the ELF (interpreter) image.
345 * The _first_ mmap needs to know the full size, otherwise
346 * randomization might put this image into an overlapping
347 * position with the ELF binary image. (since size < total_size)
348 * So we first map the 'big' image - and unmap the remainder at
349 * the end. (which unmap is needed for ELF images with holes.)
350 */
351 if (total_size) {
352 total_size = ELF_PAGEALIGN(total_size);
353 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
354 if (!BAD_ADDR(map_addr))
355 do_munmap(current->mm, map_addr+size, total_size-size);
356 } else
357 map_addr = do_mmap(filep, addr, size, prot, type, off);
358
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 up_write(&current->mm->mmap_sem);
360 return(map_addr);
361}
362
363#endif /* !elf_map */
364
Jiri Kosinacc503c12008-01-30 13:31:07 +0100365static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
366{
367 int i, first_idx = -1, last_idx = -1;
368
369 for (i = 0; i < nr; i++) {
370 if (cmds[i].p_type == PT_LOAD) {
371 last_idx = i;
372 if (first_idx == -1)
373 first_idx = i;
374 }
375 }
376 if (first_idx == -1)
377 return 0;
378
379 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
380 ELF_PAGESTART(cmds[first_idx].p_vaddr);
381}
382
383
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384/* This is much more generalized than the library routine read function,
385 so we keep this separate. Technically the library read function
386 is only provided so that we can read a.out libraries that have
387 an ELF header */
388
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700389static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
Jiri Kosinacc503c12008-01-30 13:31:07 +0100390 struct file *interpreter, unsigned long *interp_map_addr,
391 unsigned long no_base)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392{
393 struct elf_phdr *elf_phdata;
394 struct elf_phdr *eppnt;
395 unsigned long load_addr = 0;
396 int load_addr_set = 0;
397 unsigned long last_bss = 0, elf_bss = 0;
398 unsigned long error = ~0UL;
Jiri Kosinacc503c12008-01-30 13:31:07 +0100399 unsigned long total_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 int retval, i, size;
401
402 /* First of all, some simple consistency checks */
403 if (interp_elf_ex->e_type != ET_EXEC &&
404 interp_elf_ex->e_type != ET_DYN)
405 goto out;
406 if (!elf_check_arch(interp_elf_ex))
407 goto out;
408 if (!interpreter->f_op || !interpreter->f_op->mmap)
409 goto out;
410
411 /*
412 * If the size of this structure has changed, then punt, since
413 * we will be doing the wrong thing.
414 */
415 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
416 goto out;
417 if (interp_elf_ex->e_phnum < 1 ||
418 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
419 goto out;
420
421 /* Now read in all of the header information */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
423 if (size > ELF_MIN_ALIGN)
424 goto out;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700425 elf_phdata = kmalloc(size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 if (!elf_phdata)
427 goto out;
428
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700429 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
430 (char *)elf_phdata,size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 error = -EIO;
432 if (retval != size) {
433 if (retval < 0)
434 error = retval;
435 goto out_close;
436 }
437
Jiri Kosinacc503c12008-01-30 13:31:07 +0100438 total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
439 if (!total_size) {
440 error = -EINVAL;
441 goto out_close;
442 }
443
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 eppnt = elf_phdata;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700445 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
446 if (eppnt->p_type == PT_LOAD) {
447 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
448 int elf_prot = 0;
449 unsigned long vaddr = 0;
450 unsigned long k, map_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700452 if (eppnt->p_flags & PF_R)
453 elf_prot = PROT_READ;
454 if (eppnt->p_flags & PF_W)
455 elf_prot |= PROT_WRITE;
456 if (eppnt->p_flags & PF_X)
457 elf_prot |= PROT_EXEC;
458 vaddr = eppnt->p_vaddr;
459 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
460 elf_type |= MAP_FIXED;
Jiri Kosinacc503c12008-01-30 13:31:07 +0100461 else if (no_base && interp_elf_ex->e_type == ET_DYN)
462 load_addr = -vaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700464 map_addr = elf_map(interpreter, load_addr + vaddr,
Andrew Mortonbb1ad822008-01-30 13:31:07 +0100465 eppnt, elf_prot, elf_type, total_size);
Jiri Kosinacc503c12008-01-30 13:31:07 +0100466 total_size = 0;
467 if (!*interp_map_addr)
468 *interp_map_addr = map_addr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700469 error = map_addr;
470 if (BAD_ADDR(map_addr))
471 goto out_close;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700473 if (!load_addr_set &&
474 interp_elf_ex->e_type == ET_DYN) {
475 load_addr = map_addr - ELF_PAGESTART(vaddr);
476 load_addr_set = 1;
477 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700479 /*
480 * Check to see if the section's size will overflow the
481 * allowed task size. Note that p_filesz must always be
482 * <= p_memsize so it's only necessary to check p_memsz.
483 */
484 k = load_addr + eppnt->p_vaddr;
Chuck Ebbertce510592006-07-03 00:24:14 -0700485 if (BAD_ADDR(k) ||
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700486 eppnt->p_filesz > eppnt->p_memsz ||
487 eppnt->p_memsz > TASK_SIZE ||
488 TASK_SIZE - eppnt->p_memsz < k) {
489 error = -ENOMEM;
490 goto out_close;
491 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700493 /*
494 * Find the end of the file mapping for this phdr, and
495 * keep track of the largest address we see for this.
496 */
497 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
498 if (k > elf_bss)
499 elf_bss = k;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700501 /*
502 * Do the same thing for the memory mapping - between
503 * elf_bss and last_bss is the bss section.
504 */
505 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
506 if (k > last_bss)
507 last_bss = k;
508 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 }
510
511 /*
512 * Now fill out the bss section. First pad the last page up
513 * to the page boundary, and then perform a mmap to make sure
514 * that there are zero-mapped pages up to and including the
515 * last bss page.
516 */
517 if (padzero(elf_bss)) {
518 error = -EFAULT;
519 goto out_close;
520 }
521
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700522 /* What we have mapped so far */
523 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524
525 /* Map the last of the bss segment */
526 if (last_bss > elf_bss) {
527 down_write(&current->mm->mmap_sem);
528 error = do_brk(elf_bss, last_bss - elf_bss);
529 up_write(&current->mm->mmap_sem);
530 if (BAD_ADDR(error))
531 goto out_close;
532 }
533
Jiri Kosinacc503c12008-01-30 13:31:07 +0100534 error = load_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535
536out_close:
537 kfree(elf_phdata);
538out:
539 return error;
540}
541
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542/*
543 * These are the functions used to load ELF style executables and shared
544 * libraries. There is no binary dependent code anywhere else.
545 */
546
547#define INTERPRETER_NONE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548#define INTERPRETER_ELF 2
549
Andi Kleen913bd902006-03-25 16:29:09 +0100550#ifndef STACK_RND_MASK
James Bottomleyd1cabd62007-03-16 13:38:35 -0800551#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
Andi Kleen913bd902006-03-25 16:29:09 +0100552#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553
554static unsigned long randomize_stack_top(unsigned long stack_top)
555{
556 unsigned int random_variable = 0;
557
Andi Kleenc16b63e2006-09-26 10:52:28 +0200558 if ((current->flags & PF_RANDOMIZE) &&
559 !(current->personality & ADDR_NO_RANDOMIZE)) {
Andi Kleen913bd902006-03-25 16:29:09 +0100560 random_variable = get_random_int() & STACK_RND_MASK;
561 random_variable <<= PAGE_SHIFT;
562 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563#ifdef CONFIG_STACK_GROWSUP
Andi Kleen913bd902006-03-25 16:29:09 +0100564 return PAGE_ALIGN(stack_top) + random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565#else
Andi Kleen913bd902006-03-25 16:29:09 +0100566 return PAGE_ALIGN(stack_top) - random_variable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567#endif
568}
569
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700570static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571{
572 struct file *interpreter = NULL; /* to shut gcc up */
573 unsigned long load_addr = 0, load_bias = 0;
574 int load_addr_set = 0;
575 char * elf_interpreter = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 unsigned long error;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700577 struct elf_phdr *elf_ppnt, *elf_phdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 unsigned long elf_bss, elf_brk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 int retval, i;
580 unsigned int size;
Jiri Kosinacc503c12008-01-30 13:31:07 +0100581 unsigned long elf_entry;
582 unsigned long interp_load_addr = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 unsigned long start_code, end_code, start_data, end_data;
584 unsigned long reloc_func_desc = 0;
David Rientjes8de61e62006-12-06 20:40:16 -0800585 int executable_stack = EXSTACK_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 unsigned long def_flags = 0;
587 struct {
588 struct elfhdr elf_ex;
589 struct elfhdr interp_elf_ex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 } *loc;
591
592 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
593 if (!loc) {
594 retval = -ENOMEM;
595 goto out_ret;
596 }
597
598 /* Get the exec-header */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700599 loc->elf_ex = *((struct elfhdr *)bprm->buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600
601 retval = -ENOEXEC;
602 /* First of all, some simple consistency checks */
603 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
604 goto out;
605
606 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
607 goto out;
608 if (!elf_check_arch(&loc->elf_ex))
609 goto out;
610 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
611 goto out;
612
613 /* Now read in all of the header information */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
615 goto out;
616 if (loc->elf_ex.e_phnum < 1 ||
617 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
618 goto out;
619 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
620 retval = -ENOMEM;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700621 elf_phdata = kmalloc(size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622 if (!elf_phdata)
623 goto out;
624
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700625 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
626 (char *)elf_phdata, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 if (retval != size) {
628 if (retval >= 0)
629 retval = -EIO;
630 goto out_free_ph;
631 }
632
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 elf_ppnt = elf_phdata;
634 elf_bss = 0;
635 elf_brk = 0;
636
637 start_code = ~0UL;
638 end_code = 0;
639 start_data = 0;
640 end_data = 0;
641
642 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
643 if (elf_ppnt->p_type == PT_INTERP) {
644 /* This is the program interpreter used for
645 * shared libraries - for now assume that this
646 * is an a.out format binary
647 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 retval = -ENOEXEC;
649 if (elf_ppnt->p_filesz > PATH_MAX ||
650 elf_ppnt->p_filesz < 2)
Al Viroe7b9b552009-03-29 16:31:16 -0400651 goto out_free_ph;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
653 retval = -ENOMEM;
Jesper Juhl792db3a2006-01-09 20:54:45 -0800654 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700655 GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 if (!elf_interpreter)
Al Viroe7b9b552009-03-29 16:31:16 -0400657 goto out_free_ph;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658
659 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700660 elf_interpreter,
661 elf_ppnt->p_filesz);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 if (retval != elf_ppnt->p_filesz) {
663 if (retval >= 0)
664 retval = -EIO;
665 goto out_free_interp;
666 }
667 /* make sure path is NULL terminated */
668 retval = -ENOEXEC;
669 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
670 goto out_free_interp;
671
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 /*
673 * The early SET_PERSONALITY here is so that the lookup
674 * for the interpreter happens in the namespace of the
675 * to-be-execed image. SET_PERSONALITY can select an
676 * alternate root.
677 *
678 * However, SET_PERSONALITY is NOT allowed to switch
679 * this task into the new images's memory mapping
680 * policy - that is, TASK_SIZE must still evaluate to
681 * that which is appropriate to the execing application.
682 * This is because exit_mmap() needs to have TASK_SIZE
683 * evaluate to the size of the old image.
684 *
685 * So if (say) a 64-bit application is execing a 32-bit
686 * application it is the architecture's responsibility
687 * to defer changing the value of TASK_SIZE until the
688 * switch really is going to happen - do this in
689 * flush_thread(). - akpm
690 */
Martin Schwidefsky0b592682008-10-16 15:39:57 +0200691 SET_PERSONALITY(loc->elf_ex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692
693 interpreter = open_exec(elf_interpreter);
694 retval = PTR_ERR(interpreter);
695 if (IS_ERR(interpreter))
696 goto out_free_interp;
Alexey Dobriyan1fb84492007-01-26 00:57:16 -0800697
698 /*
699 * If the binary is not readable then enforce
700 * mm->dumpable = 0 regardless of the interpreter's
701 * permissions.
702 */
703 if (file_permission(interpreter, MAY_READ) < 0)
704 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
705
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700706 retval = kernel_read(interpreter, 0, bprm->buf,
707 BINPRM_BUF_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 if (retval != BINPRM_BUF_SIZE) {
709 if (retval >= 0)
710 retval = -EIO;
711 goto out_free_dentry;
712 }
713
714 /* Get the exec headers */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700715 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 break;
717 }
718 elf_ppnt++;
719 }
720
721 elf_ppnt = elf_phdata;
722 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
723 if (elf_ppnt->p_type == PT_GNU_STACK) {
724 if (elf_ppnt->p_flags & PF_X)
725 executable_stack = EXSTACK_ENABLE_X;
726 else
727 executable_stack = EXSTACK_DISABLE_X;
728 break;
729 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730
731 /* Some simple consistency checks for the interpreter */
732 if (elf_interpreter) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 retval = -ELIBBAD;
Andi Kleend20894a2008-02-08 04:21:54 -0800734 /* Not an ELF interpreter */
735 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 goto out_free_dentry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 /* Verify the interpreter has a valid arch */
Andi Kleend20894a2008-02-08 04:21:54 -0800738 if (!elf_check_arch(&loc->interp_elf_ex))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 goto out_free_dentry;
740 } else {
741 /* Executables without an interpreter also need a personality */
Martin Schwidefsky0b592682008-10-16 15:39:57 +0200742 SET_PERSONALITY(loc->elf_ex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 }
744
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 /* Flush all traces of the currently running executable */
746 retval = flush_old_exec(bprm);
747 if (retval)
748 goto out_free_dentry;
749
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 /* OK, This is the point of no return */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 current->flags &= ~PF_FORKNOEXEC;
752 current->mm->def_flags = def_flags;
753
754 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
755 may depend on the personality. */
Martin Schwidefsky0b592682008-10-16 15:39:57 +0200756 SET_PERSONALITY(loc->elf_ex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
758 current->personality |= READ_IMPLIES_EXEC;
759
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700760 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 current->flags |= PF_RANDOMIZE;
762 arch_pick_mmap_layout(current->mm);
763
764 /* Do this so that we can load the interpreter, if need be. We will
765 change some of these later */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 current->mm->free_area_cache = current->mm->mmap_base;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700767 current->mm->cached_hole_size = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
769 executable_stack);
770 if (retval < 0) {
771 send_sig(SIGKILL, current, 0);
772 goto out_free_dentry;
773 }
774
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 current->mm->start_stack = bprm->p;
776
777 /* Now we do a little grungy work by mmaping the ELF image into
Jiri Kosinacc503c12008-01-30 13:31:07 +0100778 the correct location in memory. */
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700779 for(i = 0, elf_ppnt = elf_phdata;
780 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 int elf_prot = 0, elf_flags;
782 unsigned long k, vaddr;
783
784 if (elf_ppnt->p_type != PT_LOAD)
785 continue;
786
787 if (unlikely (elf_brk > elf_bss)) {
788 unsigned long nbyte;
789
790 /* There was a PT_LOAD segment with p_memsz > p_filesz
791 before this one. Map anonymous pages, if needed,
792 and clear the area. */
793 retval = set_brk (elf_bss + load_bias,
794 elf_brk + load_bias);
795 if (retval) {
796 send_sig(SIGKILL, current, 0);
797 goto out_free_dentry;
798 }
799 nbyte = ELF_PAGEOFFSET(elf_bss);
800 if (nbyte) {
801 nbyte = ELF_MIN_ALIGN - nbyte;
802 if (nbyte > elf_brk - elf_bss)
803 nbyte = elf_brk - elf_bss;
804 if (clear_user((void __user *)elf_bss +
805 load_bias, nbyte)) {
806 /*
807 * This bss-zeroing can fail if the ELF
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700808 * file specifies odd protections. So
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 * we don't check the return value
810 */
811 }
812 }
813 }
814
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700815 if (elf_ppnt->p_flags & PF_R)
816 elf_prot |= PROT_READ;
817 if (elf_ppnt->p_flags & PF_W)
818 elf_prot |= PROT_WRITE;
819 if (elf_ppnt->p_flags & PF_X)
820 elf_prot |= PROT_EXEC;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700822 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823
824 vaddr = elf_ppnt->p_vaddr;
825 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
826 elf_flags |= MAP_FIXED;
827 } else if (loc->elf_ex.e_type == ET_DYN) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700828 /* Try and get dynamic programs out of the way of the
829 * default mmap base, as well as whatever program they
830 * might try to exec. This is because the brk will
831 * follow the loader, and is not movable. */
Jiri Kosinacc503c12008-01-30 13:31:07 +0100832#ifdef CONFIG_X86
833 load_bias = 0;
834#else
Linus Torvalds90cb28e2007-01-06 13:28:21 -0800835 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
Jiri Kosinacc503c12008-01-30 13:31:07 +0100836#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 }
838
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700839 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
Andrew Mortonbb1ad822008-01-30 13:31:07 +0100840 elf_prot, elf_flags, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 if (BAD_ADDR(error)) {
842 send_sig(SIGKILL, current, 0);
Alexey Kuznetsovb140f252007-05-08 00:31:57 -0700843 retval = IS_ERR((void *)error) ?
844 PTR_ERR((void*)error) : -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 goto out_free_dentry;
846 }
847
848 if (!load_addr_set) {
849 load_addr_set = 1;
850 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
851 if (loc->elf_ex.e_type == ET_DYN) {
852 load_bias += error -
853 ELF_PAGESTART(load_bias + vaddr);
854 load_addr += load_bias;
855 reloc_func_desc = load_bias;
856 }
857 }
858 k = elf_ppnt->p_vaddr;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700859 if (k < start_code)
860 start_code = k;
861 if (start_data < k)
862 start_data = k;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863
864 /*
865 * Check to see if the section's size will overflow the
866 * allowed task size. Note that p_filesz must always be
867 * <= p_memsz so it is only necessary to check p_memsz.
868 */
Chuck Ebbertce510592006-07-03 00:24:14 -0700869 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 elf_ppnt->p_memsz > TASK_SIZE ||
871 TASK_SIZE - elf_ppnt->p_memsz < k) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700872 /* set_brk can never work. Avoid overflows. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 send_sig(SIGKILL, current, 0);
Alexey Kuznetsovb140f252007-05-08 00:31:57 -0700874 retval = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875 goto out_free_dentry;
876 }
877
878 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
879
880 if (k > elf_bss)
881 elf_bss = k;
882 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
883 end_code = k;
884 if (end_data < k)
885 end_data = k;
886 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
887 if (k > elf_brk)
888 elf_brk = k;
889 }
890
891 loc->elf_ex.e_entry += load_bias;
892 elf_bss += load_bias;
893 elf_brk += load_bias;
894 start_code += load_bias;
895 end_code += load_bias;
896 start_data += load_bias;
897 end_data += load_bias;
898
899 /* Calling set_brk effectively mmaps the pages that we need
900 * for the bss and break sections. We must do this before
901 * mapping in the interpreter, to make sure it doesn't wind
902 * up getting placed where the bss needs to go.
903 */
904 retval = set_brk(elf_bss, elf_brk);
905 if (retval) {
906 send_sig(SIGKILL, current, 0);
907 goto out_free_dentry;
908 }
akpm@osdl.org6de50512005-10-11 08:29:08 -0700909 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 send_sig(SIGSEGV, current, 0);
911 retval = -EFAULT; /* Nobody gets to see this, but.. */
912 goto out_free_dentry;
913 }
914
915 if (elf_interpreter) {
Andi Kleend20894a2008-02-08 04:21:54 -0800916 unsigned long uninitialized_var(interp_map_addr);
Jiri Kosinacc503c12008-01-30 13:31:07 +0100917
Andi Kleend20894a2008-02-08 04:21:54 -0800918 elf_entry = load_elf_interp(&loc->interp_elf_ex,
919 interpreter,
920 &interp_map_addr,
921 load_bias);
922 if (!IS_ERR((void *)elf_entry)) {
923 /*
924 * load_elf_interp() returns relocation
925 * adjustment
926 */
927 interp_load_addr = elf_entry;
928 elf_entry += loc->interp_elf_ex.e_entry;
Jiri Kosinacc503c12008-01-30 13:31:07 +0100929 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 if (BAD_ADDR(elf_entry)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 force_sig(SIGSEGV, current);
Chuck Ebbertce510592006-07-03 00:24:14 -0700932 retval = IS_ERR((void *)elf_entry) ?
933 (int)elf_entry : -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 goto out_free_dentry;
935 }
936 reloc_func_desc = interp_load_addr;
937
938 allow_write_access(interpreter);
939 fput(interpreter);
940 kfree(elf_interpreter);
941 } else {
942 elf_entry = loc->elf_ex.e_entry;
Suresh Siddha5342fba2006-02-26 04:18:28 +0100943 if (BAD_ADDR(elf_entry)) {
Chuck Ebbertce510592006-07-03 00:24:14 -0700944 force_sig(SIGSEGV, current);
945 retval = -EINVAL;
Suresh Siddha5342fba2006-02-26 04:18:28 +0100946 goto out_free_dentry;
947 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 }
949
950 kfree(elf_phdata);
951
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 set_binfmt(&elf_format);
953
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -0700954#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
Martin Schwidefskyfc5243d2008-12-25 13:38:35 +0100955 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -0700956 if (retval < 0) {
957 send_sig(SIGKILL, current, 0);
Roland McGrath18c8baf2005-04-28 15:17:19 -0700958 goto out;
Benjamin Herrenschmidt547ee842005-04-16 15:24:35 -0700959 }
960#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
961
David Howellsa6f76f22008-11-14 10:39:24 +1100962 install_exec_creds(bprm);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 current->flags &= ~PF_FORKNOEXEC;
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700964 retval = create_elf_tables(bprm, &loc->elf_ex,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700965 load_addr, interp_load_addr);
Ollie Wildb6a2fea2007-07-19 01:48:16 -0700966 if (retval < 0) {
967 send_sig(SIGKILL, current, 0);
968 goto out;
969 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 /* N.B. passed_fileno might not be initialized? */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 current->mm->end_code = end_code;
972 current->mm->start_code = start_code;
973 current->mm->start_data = start_data;
974 current->mm->end_data = end_data;
975 current->mm->start_stack = bprm->p;
976
Jiri Kosinac1d171a2008-01-30 13:30:40 +0100977#ifdef arch_randomize_brk
Ingo Molnar32a93232008-02-06 22:39:44 +0100978 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
Jiri Kosinac1d171a2008-01-30 13:30:40 +0100979 current->mm->brk = current->mm->start_brk =
980 arch_randomize_brk(current->mm);
981#endif
982
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 if (current->personality & MMAP_PAGE_ZERO) {
984 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
985 and some applications "depend" upon this behavior.
986 Since we do not have the power to recompile these, we
Jesper Juhlf4e5cc22006-06-23 02:05:35 -0700987 emulate the SVr4 behavior. Sigh. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988 down_write(&current->mm->mmap_sem);
989 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
990 MAP_FIXED | MAP_PRIVATE, 0);
991 up_write(&current->mm->mmap_sem);
992 }
993
994#ifdef ELF_PLAT_INIT
995 /*
996 * The ABI may specify that certain registers be set up in special
997 * ways (on i386 %edx is the address of a DT_FINI function, for
998 * example. In addition, it may also specify (eg, PowerPC64 ELF)
999 * that the e_entry field is the address of the function descriptor
1000 * for the startup routine, rather than the address of the startup
1001 * routine itself. This macro performs whatever initialization to
1002 * the regs structure is required as well as any relocations to the
1003 * function descriptor entries when executing dynamically links apps.
1004 */
1005 ELF_PLAT_INIT(regs, reloc_func_desc);
1006#endif
1007
1008 start_thread(regs, elf_entry, bprm->p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 retval = 0;
1010out:
1011 kfree(loc);
1012out_ret:
1013 return retval;
1014
1015 /* error cleanup */
1016out_free_dentry:
1017 allow_write_access(interpreter);
1018 if (interpreter)
1019 fput(interpreter);
1020out_free_interp:
Jesper Juhlf99d49a2005-11-07 01:01:34 -08001021 kfree(elf_interpreter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022out_free_ph:
1023 kfree(elf_phdata);
1024 goto out;
1025}
1026
1027/* This is really simpleminded and specialized - we are loading an
1028 a.out library that is given an ELF header. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029static int load_elf_library(struct file *file)
1030{
1031 struct elf_phdr *elf_phdata;
1032 struct elf_phdr *eppnt;
1033 unsigned long elf_bss, bss, len;
1034 int retval, error, i, j;
1035 struct elfhdr elf_ex;
1036
1037 error = -ENOEXEC;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001038 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 if (retval != sizeof(elf_ex))
1040 goto out;
1041
1042 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1043 goto out;
1044
1045 /* First of all, some simple consistency checks */
1046 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001047 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 goto out;
1049
1050 /* Now read in all of the header information */
1051
1052 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1053 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1054
1055 error = -ENOMEM;
1056 elf_phdata = kmalloc(j, GFP_KERNEL);
1057 if (!elf_phdata)
1058 goto out;
1059
1060 eppnt = elf_phdata;
1061 error = -ENOEXEC;
1062 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1063 if (retval != j)
1064 goto out_free_ph;
1065
1066 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1067 if ((eppnt + i)->p_type == PT_LOAD)
1068 j++;
1069 if (j != 1)
1070 goto out_free_ph;
1071
1072 while (eppnt->p_type != PT_LOAD)
1073 eppnt++;
1074
1075 /* Now use mmap to map the library into memory. */
1076 down_write(&current->mm->mmap_sem);
1077 error = do_mmap(file,
1078 ELF_PAGESTART(eppnt->p_vaddr),
1079 (eppnt->p_filesz +
1080 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1081 PROT_READ | PROT_WRITE | PROT_EXEC,
1082 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1083 (eppnt->p_offset -
1084 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1085 up_write(&current->mm->mmap_sem);
1086 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1087 goto out_free_ph;
1088
1089 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1090 if (padzero(elf_bss)) {
1091 error = -EFAULT;
1092 goto out_free_ph;
1093 }
1094
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001095 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1096 ELF_MIN_ALIGN - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 bss = eppnt->p_memsz + eppnt->p_vaddr;
1098 if (bss > len) {
1099 down_write(&current->mm->mmap_sem);
1100 do_brk(len, bss - len);
1101 up_write(&current->mm->mmap_sem);
1102 }
1103 error = 0;
1104
1105out_free_ph:
1106 kfree(elf_phdata);
1107out:
1108 return error;
1109}
1110
1111/*
1112 * Note that some platforms still use traditional core dumps and not
1113 * the ELF core dump. Each platform can select it as appropriate.
1114 */
Matt Mackall708e9a72006-01-08 01:05:25 -08001115#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116
1117/*
1118 * ELF core dumper
1119 *
1120 * Modelled on fs/exec.c:aout_core_dump()
1121 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1122 */
1123/*
1124 * These are the only things you should do on a core-file: use only these
1125 * functions to write out all the necessary info.
1126 */
1127static int dump_write(struct file *file, const void *addr, int nr)
1128{
1129 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1130}
1131
Daniel Jacobowitz5db92852005-06-15 22:26:34 -07001132static int dump_seek(struct file *file, loff_t off)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133{
Andi Kleend025c9d2006-09-30 23:29:28 -07001134 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
Petr Vandrovec7f14daa2006-10-13 04:13:16 +02001135 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 return 0;
Andi Kleend025c9d2006-09-30 23:29:28 -07001137 } else {
1138 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1139 if (!buf)
1140 return 0;
1141 while (off > 0) {
1142 unsigned long n = off;
1143 if (n > PAGE_SIZE)
1144 n = PAGE_SIZE;
1145 if (!dump_write(file, buf, n))
1146 return 0;
1147 off -= n;
1148 }
1149 free_page((unsigned long)buf);
1150 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 return 1;
1152}
1153
1154/*
Roland McGrath82df3972007-10-16 23:27:02 -07001155 * Decide what to dump of a segment, part, all or none.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156 */
Roland McGrath82df3972007-10-16 23:27:02 -07001157static unsigned long vma_dump_size(struct vm_area_struct *vma,
1158 unsigned long mm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159{
KOSAKI Motohiroe575f112008-10-18 20:27:08 -07001160#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1161
Roland McGrathe5b97dd2007-01-26 00:56:48 -08001162 /* The vma can be set up to tell us the answer directly. */
1163 if (vma->vm_flags & VM_ALWAYSDUMP)
Roland McGrath82df3972007-10-16 23:27:02 -07001164 goto whole;
Roland McGrathe5b97dd2007-01-26 00:56:48 -08001165
KOSAKI Motohiroe575f112008-10-18 20:27:08 -07001166 /* Hugetlb memory check */
1167 if (vma->vm_flags & VM_HUGETLB) {
1168 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1169 goto whole;
1170 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1171 goto whole;
1172 }
1173
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 /* Do not dump I/O mapped devices or special mappings */
1175 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1176 return 0;
1177
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001178 /* By default, dump shared memory if mapped from an anonymous file. */
1179 if (vma->vm_flags & VM_SHARED) {
Roland McGrath82df3972007-10-16 23:27:02 -07001180 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1181 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1182 goto whole;
1183 return 0;
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001184 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185
Roland McGrath82df3972007-10-16 23:27:02 -07001186 /* Dump segments that have been written to. */
1187 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1188 goto whole;
1189 if (vma->vm_file == NULL)
1190 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191
Roland McGrath82df3972007-10-16 23:27:02 -07001192 if (FILTER(MAPPED_PRIVATE))
1193 goto whole;
1194
1195 /*
1196 * If this looks like the beginning of a DSO or executable mapping,
1197 * check for an ELF header. If we find one, dump the first page to
1198 * aid in determining what was mapped here.
1199 */
Roland McGrath92dc07b2009-02-06 17:34:07 -08001200 if (FILTER(ELF_HEADERS) &&
1201 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
Roland McGrath82df3972007-10-16 23:27:02 -07001202 u32 __user *header = (u32 __user *) vma->vm_start;
1203 u32 word;
Roland McGrath92dc07b2009-02-06 17:34:07 -08001204 mm_segment_t fs = get_fs();
Roland McGrath82df3972007-10-16 23:27:02 -07001205 /*
1206 * Doing it this way gets the constant folded by GCC.
1207 */
1208 union {
1209 u32 cmp;
1210 char elfmag[SELFMAG];
1211 } magic;
1212 BUILD_BUG_ON(SELFMAG != sizeof word);
1213 magic.elfmag[EI_MAG0] = ELFMAG0;
1214 magic.elfmag[EI_MAG1] = ELFMAG1;
1215 magic.elfmag[EI_MAG2] = ELFMAG2;
1216 magic.elfmag[EI_MAG3] = ELFMAG3;
Roland McGrath92dc07b2009-02-06 17:34:07 -08001217 /*
1218 * Switch to the user "segment" for get_user(),
1219 * then put back what elf_core_dump() had in place.
1220 */
1221 set_fs(USER_DS);
1222 if (unlikely(get_user(word, header)))
1223 word = 0;
1224 set_fs(fs);
1225 if (word == magic.cmp)
Roland McGrath82df3972007-10-16 23:27:02 -07001226 return PAGE_SIZE;
1227 }
1228
1229#undef FILTER
1230
1231 return 0;
1232
1233whole:
1234 return vma->vm_end - vma->vm_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235}
1236
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237/* An ELF note in memory */
1238struct memelfnote
1239{
1240 const char *name;
1241 int type;
1242 unsigned int datasz;
1243 void *data;
1244};
1245
1246static int notesize(struct memelfnote *en)
1247{
1248 int sz;
1249
1250 sz = sizeof(struct elf_note);
1251 sz += roundup(strlen(en->name) + 1, 4);
1252 sz += roundup(en->datasz, 4);
1253
1254 return sz;
1255}
1256
Andi Kleend025c9d2006-09-30 23:29:28 -07001257#define DUMP_WRITE(addr, nr, foffset) \
1258 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259
Andi Kleend025c9d2006-09-30 23:29:28 -07001260static int alignfile(struct file *file, loff_t *foffset)
1261{
Petr Vandroveca7a0d862006-10-13 18:42:07 +02001262 static const char buf[4] = { 0, };
Andi Kleend025c9d2006-09-30 23:29:28 -07001263 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1264 return 1;
1265}
1266
1267static int writenote(struct memelfnote *men, struct file *file,
1268 loff_t *foffset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269{
1270 struct elf_note en;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 en.n_namesz = strlen(men->name) + 1;
1272 en.n_descsz = men->datasz;
1273 en.n_type = men->type;
1274
Andi Kleend025c9d2006-09-30 23:29:28 -07001275 DUMP_WRITE(&en, sizeof(en), foffset);
1276 DUMP_WRITE(men->name, en.n_namesz, foffset);
1277 if (!alignfile(file, foffset))
1278 return 0;
1279 DUMP_WRITE(men->data, men->datasz, foffset);
1280 if (!alignfile(file, foffset))
1281 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282
1283 return 1;
1284}
1285#undef DUMP_WRITE
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286
1287#define DUMP_WRITE(addr, nr) \
1288 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1289 goto end_coredump;
1290#define DUMP_SEEK(off) \
1291 if (!dump_seek(file, (off))) \
1292 goto end_coredump;
1293
Roland McGrath3aba4812008-01-30 13:31:44 +01001294static void fill_elf_header(struct elfhdr *elf, int segs,
1295 u16 machine, u32 flags, u8 osabi)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296{
Cyrill Gorcunov6970c8e2008-04-29 01:01:18 -07001297 memset(elf, 0, sizeof(*elf));
1298
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1300 elf->e_ident[EI_CLASS] = ELF_CLASS;
1301 elf->e_ident[EI_DATA] = ELF_DATA;
1302 elf->e_ident[EI_VERSION] = EV_CURRENT;
1303 elf->e_ident[EI_OSABI] = ELF_OSABI;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304
1305 elf->e_type = ET_CORE;
Roland McGrath3aba4812008-01-30 13:31:44 +01001306 elf->e_machine = machine;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 elf->e_version = EV_CURRENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 elf->e_phoff = sizeof(struct elfhdr);
Roland McGrath3aba4812008-01-30 13:31:44 +01001309 elf->e_flags = flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 elf->e_ehsize = sizeof(struct elfhdr);
1311 elf->e_phentsize = sizeof(struct elf_phdr);
1312 elf->e_phnum = segs;
Cyrill Gorcunov6970c8e2008-04-29 01:01:18 -07001313
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314 return;
1315}
1316
Andrew Morton8d6b5eee2006-09-25 23:32:04 -07001317static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318{
1319 phdr->p_type = PT_NOTE;
1320 phdr->p_offset = offset;
1321 phdr->p_vaddr = 0;
1322 phdr->p_paddr = 0;
1323 phdr->p_filesz = sz;
1324 phdr->p_memsz = 0;
1325 phdr->p_flags = 0;
1326 phdr->p_align = 0;
1327 return;
1328}
1329
1330static void fill_note(struct memelfnote *note, const char *name, int type,
1331 unsigned int sz, void *data)
1332{
1333 note->name = name;
1334 note->type = type;
1335 note->datasz = sz;
1336 note->data = data;
1337 return;
1338}
1339
1340/*
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001341 * fill up all the fields in prstatus from the given task struct, except
1342 * registers which need to be filled up separately.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 */
1344static void fill_prstatus(struct elf_prstatus *prstatus,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001345 struct task_struct *p, long signr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346{
1347 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1348 prstatus->pr_sigpend = p->pending.signal.sig[0];
1349 prstatus->pr_sighold = p->blocked.sig[0];
Pavel Emelyanovb4888932007-10-18 23:40:14 -07001350 prstatus->pr_pid = task_pid_vnr(p);
Roland McGrath45626bb2008-01-07 14:22:44 -08001351 prstatus->pr_ppid = task_pid_vnr(p->real_parent);
Pavel Emelyanovb4888932007-10-18 23:40:14 -07001352 prstatus->pr_pgrp = task_pgrp_vnr(p);
1353 prstatus->pr_sid = task_session_vnr(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 if (thread_group_leader(p)) {
Frank Mayharf06febc2008-09-12 09:54:39 -07001355 struct task_cputime cputime;
1356
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 /*
Frank Mayharf06febc2008-09-12 09:54:39 -07001358 * This is the record for the group leader. It shows the
1359 * group-wide total, not its individual thread total.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 */
Frank Mayharf06febc2008-09-12 09:54:39 -07001361 thread_group_cputime(p, &cputime);
1362 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1363 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 } else {
1365 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1366 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1367 }
1368 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1369 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1370}
1371
1372static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1373 struct mm_struct *mm)
1374{
David Howellsc69e8d92008-11-14 10:39:19 +11001375 const struct cred *cred;
Greg Kroah-Hartmana84a5052005-05-11 00:10:44 -07001376 unsigned int i, len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377
1378 /* first copy the parameters from user space */
1379 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1380
1381 len = mm->arg_end - mm->arg_start;
1382 if (len >= ELF_PRARGSZ)
1383 len = ELF_PRARGSZ-1;
1384 if (copy_from_user(&psinfo->pr_psargs,
1385 (const char __user *)mm->arg_start, len))
1386 return -EFAULT;
1387 for(i = 0; i < len; i++)
1388 if (psinfo->pr_psargs[i] == 0)
1389 psinfo->pr_psargs[i] = ' ';
1390 psinfo->pr_psargs[len] = 0;
1391
Pavel Emelyanovb4888932007-10-18 23:40:14 -07001392 psinfo->pr_pid = task_pid_vnr(p);
Roland McGrath45626bb2008-01-07 14:22:44 -08001393 psinfo->pr_ppid = task_pid_vnr(p->real_parent);
Pavel Emelyanovb4888932007-10-18 23:40:14 -07001394 psinfo->pr_pgrp = task_pgrp_vnr(p);
1395 psinfo->pr_sid = task_session_vnr(p);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396
1397 i = p->state ? ffz(~p->state) + 1 : 0;
1398 psinfo->pr_state = i;
Carsten Otte55148542006-03-25 03:08:22 -08001399 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1401 psinfo->pr_nice = task_nice(p);
1402 psinfo->pr_flag = p->flags;
David Howellsc69e8d92008-11-14 10:39:19 +11001403 rcu_read_lock();
1404 cred = __task_cred(p);
1405 SET_UID(psinfo->pr_uid, cred->uid);
1406 SET_GID(psinfo->pr_gid, cred->gid);
1407 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1409
1410 return 0;
1411}
1412
Roland McGrath3aba4812008-01-30 13:31:44 +01001413static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1414{
1415 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1416 int i = 0;
1417 do
1418 i += 2;
1419 while (auxv[i - 2] != AT_NULL);
1420 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1421}
1422
Roland McGrath4206d3a2008-01-30 13:31:45 +01001423#ifdef CORE_DUMP_USE_REGSET
1424#include <linux/regset.h>
1425
1426struct elf_thread_core_info {
1427 struct elf_thread_core_info *next;
1428 struct task_struct *task;
1429 struct elf_prstatus prstatus;
1430 struct memelfnote notes[0];
1431};
1432
1433struct elf_note_info {
1434 struct elf_thread_core_info *thread;
1435 struct memelfnote psinfo;
1436 struct memelfnote auxv;
1437 size_t size;
1438 int thread_notes;
1439};
1440
Roland McGrathd31472b2008-03-04 14:28:30 -08001441/*
1442 * When a regset has a writeback hook, we call it on each thread before
1443 * dumping user memory. On register window machines, this makes sure the
1444 * user memory backing the register data is up to date before we read it.
1445 */
1446static void do_thread_regset_writeback(struct task_struct *task,
1447 const struct user_regset *regset)
1448{
1449 if (regset->writeback)
1450 regset->writeback(task, regset, 1);
1451}
1452
Roland McGrath4206d3a2008-01-30 13:31:45 +01001453static int fill_thread_core_info(struct elf_thread_core_info *t,
1454 const struct user_regset_view *view,
1455 long signr, size_t *total)
1456{
1457 unsigned int i;
1458
1459 /*
1460 * NT_PRSTATUS is the one special case, because the regset data
1461 * goes into the pr_reg field inside the note contents, rather
1462 * than being the whole note contents. We fill the reset in here.
1463 * We assume that regset 0 is NT_PRSTATUS.
1464 */
1465 fill_prstatus(&t->prstatus, t->task, signr);
1466 (void) view->regsets[0].get(t->task, &view->regsets[0],
1467 0, sizeof(t->prstatus.pr_reg),
1468 &t->prstatus.pr_reg, NULL);
1469
1470 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1471 sizeof(t->prstatus), &t->prstatus);
1472 *total += notesize(&t->notes[0]);
1473
Roland McGrathd31472b2008-03-04 14:28:30 -08001474 do_thread_regset_writeback(t->task, &view->regsets[0]);
1475
Roland McGrath4206d3a2008-01-30 13:31:45 +01001476 /*
1477 * Each other regset might generate a note too. For each regset
1478 * that has no core_note_type or is inactive, we leave t->notes[i]
1479 * all zero and we'll know to skip writing it later.
1480 */
1481 for (i = 1; i < view->n; ++i) {
1482 const struct user_regset *regset = &view->regsets[i];
Roland McGrathd31472b2008-03-04 14:28:30 -08001483 do_thread_regset_writeback(t->task, regset);
Roland McGrath4206d3a2008-01-30 13:31:45 +01001484 if (regset->core_note_type &&
1485 (!regset->active || regset->active(t->task, regset))) {
1486 int ret;
1487 size_t size = regset->n * regset->size;
1488 void *data = kmalloc(size, GFP_KERNEL);
1489 if (unlikely(!data))
1490 return 0;
1491 ret = regset->get(t->task, regset,
1492 0, size, data, NULL);
1493 if (unlikely(ret))
1494 kfree(data);
1495 else {
1496 if (regset->core_note_type != NT_PRFPREG)
1497 fill_note(&t->notes[i], "LINUX",
1498 regset->core_note_type,
1499 size, data);
1500 else {
1501 t->prstatus.pr_fpvalid = 1;
1502 fill_note(&t->notes[i], "CORE",
1503 NT_PRFPREG, size, data);
1504 }
1505 *total += notesize(&t->notes[i]);
1506 }
1507 }
1508 }
1509
1510 return 1;
1511}
1512
1513static int fill_note_info(struct elfhdr *elf, int phdrs,
1514 struct elf_note_info *info,
1515 long signr, struct pt_regs *regs)
1516{
1517 struct task_struct *dump_task = current;
1518 const struct user_regset_view *view = task_user_regset_view(dump_task);
1519 struct elf_thread_core_info *t;
1520 struct elf_prpsinfo *psinfo;
Oleg Nesterov83914442008-07-25 01:47:45 -07001521 struct core_thread *ct;
Roland McGrath4206d3a2008-01-30 13:31:45 +01001522 unsigned int i;
1523
1524 info->size = 0;
1525 info->thread = NULL;
1526
1527 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1528 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1529
1530 if (psinfo == NULL)
1531 return 0;
1532
1533 /*
1534 * Figure out how many notes we're going to need for each thread.
1535 */
1536 info->thread_notes = 0;
1537 for (i = 0; i < view->n; ++i)
1538 if (view->regsets[i].core_note_type != 0)
1539 ++info->thread_notes;
1540
1541 /*
1542 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1543 * since it is our one special case.
1544 */
1545 if (unlikely(info->thread_notes == 0) ||
1546 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1547 WARN_ON(1);
1548 return 0;
1549 }
1550
1551 /*
1552 * Initialize the ELF file header.
1553 */
1554 fill_elf_header(elf, phdrs,
1555 view->e_machine, view->e_flags, view->ei_osabi);
1556
1557 /*
1558 * Allocate a structure for each thread.
1559 */
Oleg Nesterov83914442008-07-25 01:47:45 -07001560 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1561 t = kzalloc(offsetof(struct elf_thread_core_info,
1562 notes[info->thread_notes]),
1563 GFP_KERNEL);
1564 if (unlikely(!t))
1565 return 0;
Oleg Nesterov24d52882008-07-25 01:47:40 -07001566
Oleg Nesterov83914442008-07-25 01:47:45 -07001567 t->task = ct->task;
1568 if (ct->task == dump_task || !info->thread) {
1569 t->next = info->thread;
1570 info->thread = t;
1571 } else {
1572 /*
1573 * Make sure to keep the original task at
1574 * the head of the list.
1575 */
1576 t->next = info->thread->next;
1577 info->thread->next = t;
Roland McGrath4206d3a2008-01-30 13:31:45 +01001578 }
Oleg Nesterov83914442008-07-25 01:47:45 -07001579 }
Roland McGrath4206d3a2008-01-30 13:31:45 +01001580
1581 /*
1582 * Now fill in each thread's information.
1583 */
1584 for (t = info->thread; t != NULL; t = t->next)
1585 if (!fill_thread_core_info(t, view, signr, &info->size))
1586 return 0;
1587
1588 /*
1589 * Fill in the two process-wide notes.
1590 */
1591 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1592 info->size += notesize(&info->psinfo);
1593
1594 fill_auxv_note(&info->auxv, current->mm);
1595 info->size += notesize(&info->auxv);
1596
1597 return 1;
1598}
1599
1600static size_t get_note_info_size(struct elf_note_info *info)
1601{
1602 return info->size;
1603}
1604
1605/*
1606 * Write all the notes for each thread. When writing the first thread, the
1607 * process-wide notes are interleaved after the first thread-specific note.
1608 */
1609static int write_note_info(struct elf_note_info *info,
1610 struct file *file, loff_t *foffset)
1611{
1612 bool first = 1;
1613 struct elf_thread_core_info *t = info->thread;
1614
1615 do {
1616 int i;
1617
1618 if (!writenote(&t->notes[0], file, foffset))
1619 return 0;
1620
1621 if (first && !writenote(&info->psinfo, file, foffset))
1622 return 0;
1623 if (first && !writenote(&info->auxv, file, foffset))
1624 return 0;
1625
1626 for (i = 1; i < info->thread_notes; ++i)
1627 if (t->notes[i].data &&
1628 !writenote(&t->notes[i], file, foffset))
1629 return 0;
1630
1631 first = 0;
1632 t = t->next;
1633 } while (t);
1634
1635 return 1;
1636}
1637
1638static void free_note_info(struct elf_note_info *info)
1639{
1640 struct elf_thread_core_info *threads = info->thread;
1641 while (threads) {
1642 unsigned int i;
1643 struct elf_thread_core_info *t = threads;
1644 threads = t->next;
1645 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1646 for (i = 1; i < info->thread_notes; ++i)
1647 kfree(t->notes[i].data);
1648 kfree(t);
1649 }
1650 kfree(info->psinfo.data);
1651}
1652
1653#else
1654
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655/* Here is the structure in which status of each thread is captured. */
1656struct elf_thread_status
1657{
1658 struct list_head list;
1659 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1660 elf_fpregset_t fpu; /* NT_PRFPREG */
1661 struct task_struct *thread;
1662#ifdef ELF_CORE_COPY_XFPREGS
Mark Nelson5b20cd82007-10-16 23:25:39 -07001663 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664#endif
1665 struct memelfnote notes[3];
1666 int num_notes;
1667};
1668
1669/*
1670 * In order to add the specific thread information for the elf file format,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001671 * we need to keep a linked list of every threads pr_status and then create
1672 * a single section for them in the final core file.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 */
1674static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1675{
1676 int sz = 0;
1677 struct task_struct *p = t->thread;
1678 t->num_notes = 0;
1679
1680 fill_prstatus(&t->prstatus, p, signr);
1681 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1682
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001683 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1684 &(t->prstatus));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 t->num_notes++;
1686 sz += notesize(&t->notes[0]);
1687
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001688 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1689 &t->fpu))) {
1690 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1691 &(t->fpu));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 t->num_notes++;
1693 sz += notesize(&t->notes[1]);
1694 }
1695
1696#ifdef ELF_CORE_COPY_XFPREGS
1697 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
Mark Nelson5b20cd82007-10-16 23:25:39 -07001698 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1699 sizeof(t->xfpu), &t->xfpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 t->num_notes++;
1701 sz += notesize(&t->notes[2]);
1702 }
1703#endif
1704 return sz;
1705}
1706
Roland McGrath3aba4812008-01-30 13:31:44 +01001707struct elf_note_info {
1708 struct memelfnote *notes;
1709 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1710 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1711 struct list_head thread_list;
1712 elf_fpregset_t *fpu;
1713#ifdef ELF_CORE_COPY_XFPREGS
1714 elf_fpxregset_t *xfpu;
1715#endif
1716 int thread_status_size;
1717 int numnote;
1718};
1719
1720static int fill_note_info(struct elfhdr *elf, int phdrs,
1721 struct elf_note_info *info,
1722 long signr, struct pt_regs *regs)
1723{
1724#define NUM_NOTES 6
1725 struct list_head *t;
Roland McGrath3aba4812008-01-30 13:31:44 +01001726
1727 info->notes = NULL;
1728 info->prstatus = NULL;
1729 info->psinfo = NULL;
1730 info->fpu = NULL;
1731#ifdef ELF_CORE_COPY_XFPREGS
1732 info->xfpu = NULL;
1733#endif
1734 INIT_LIST_HEAD(&info->thread_list);
1735
1736 info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1737 GFP_KERNEL);
1738 if (!info->notes)
1739 return 0;
1740 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1741 if (!info->psinfo)
1742 return 0;
1743 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1744 if (!info->prstatus)
1745 return 0;
1746 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1747 if (!info->fpu)
1748 return 0;
1749#ifdef ELF_CORE_COPY_XFPREGS
1750 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1751 if (!info->xfpu)
1752 return 0;
1753#endif
1754
1755 info->thread_status_size = 0;
1756 if (signr) {
Oleg Nesterov83914442008-07-25 01:47:45 -07001757 struct core_thread *ct;
WANG Cong4220b7f2008-04-29 01:01:18 -07001758 struct elf_thread_status *ets;
Oleg Nesterov24d52882008-07-25 01:47:40 -07001759
Oleg Nesterov83914442008-07-25 01:47:45 -07001760 for (ct = current->mm->core_state->dumper.next;
1761 ct; ct = ct->next) {
1762 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1763 if (!ets)
1764 return 0;
1765
1766 ets->thread = ct->task;
1767 list_add(&ets->list, &info->thread_list);
1768 }
1769
Roland McGrath3aba4812008-01-30 13:31:44 +01001770 list_for_each(t, &info->thread_list) {
Roland McGrath3aba4812008-01-30 13:31:44 +01001771 int sz;
1772
WANG Cong4220b7f2008-04-29 01:01:18 -07001773 ets = list_entry(t, struct elf_thread_status, list);
1774 sz = elf_dump_thread_status(signr, ets);
Roland McGrath3aba4812008-01-30 13:31:44 +01001775 info->thread_status_size += sz;
1776 }
1777 }
1778 /* now collect the dump for the current */
1779 memset(info->prstatus, 0, sizeof(*info->prstatus));
1780 fill_prstatus(info->prstatus, current, signr);
1781 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1782
1783 /* Set up header */
1784 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1785
1786 /*
1787 * Set up the notes in similar form to SVR4 core dumps made
1788 * with info from their /proc.
1789 */
1790
1791 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1792 sizeof(*info->prstatus), info->prstatus);
1793 fill_psinfo(info->psinfo, current->group_leader, current->mm);
1794 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1795 sizeof(*info->psinfo), info->psinfo);
1796
1797 info->numnote = 2;
1798
1799 fill_auxv_note(&info->notes[info->numnote++], current->mm);
1800
1801 /* Try to dump the FPU. */
1802 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1803 info->fpu);
1804 if (info->prstatus->pr_fpvalid)
1805 fill_note(info->notes + info->numnote++,
1806 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1807#ifdef ELF_CORE_COPY_XFPREGS
1808 if (elf_core_copy_task_xfpregs(current, info->xfpu))
1809 fill_note(info->notes + info->numnote++,
1810 "LINUX", ELF_CORE_XFPREG_TYPE,
1811 sizeof(*info->xfpu), info->xfpu);
1812#endif
1813
1814 return 1;
1815
1816#undef NUM_NOTES
1817}
1818
1819static size_t get_note_info_size(struct elf_note_info *info)
1820{
1821 int sz = 0;
1822 int i;
1823
1824 for (i = 0; i < info->numnote; i++)
1825 sz += notesize(info->notes + i);
1826
1827 sz += info->thread_status_size;
1828
1829 return sz;
1830}
1831
1832static int write_note_info(struct elf_note_info *info,
1833 struct file *file, loff_t *foffset)
1834{
1835 int i;
1836 struct list_head *t;
1837
1838 for (i = 0; i < info->numnote; i++)
1839 if (!writenote(info->notes + i, file, foffset))
1840 return 0;
1841
1842 /* write out the thread status notes section */
1843 list_for_each(t, &info->thread_list) {
1844 struct elf_thread_status *tmp =
1845 list_entry(t, struct elf_thread_status, list);
1846
1847 for (i = 0; i < tmp->num_notes; i++)
1848 if (!writenote(&tmp->notes[i], file, foffset))
1849 return 0;
1850 }
1851
1852 return 1;
1853}
1854
1855static void free_note_info(struct elf_note_info *info)
1856{
1857 while (!list_empty(&info->thread_list)) {
1858 struct list_head *tmp = info->thread_list.next;
1859 list_del(tmp);
1860 kfree(list_entry(tmp, struct elf_thread_status, list));
1861 }
1862
1863 kfree(info->prstatus);
1864 kfree(info->psinfo);
1865 kfree(info->notes);
1866 kfree(info->fpu);
1867#ifdef ELF_CORE_COPY_XFPREGS
1868 kfree(info->xfpu);
1869#endif
1870}
1871
Roland McGrath4206d3a2008-01-30 13:31:45 +01001872#endif
1873
Roland McGrathf47aef52007-01-26 00:56:49 -08001874static struct vm_area_struct *first_vma(struct task_struct *tsk,
1875 struct vm_area_struct *gate_vma)
1876{
1877 struct vm_area_struct *ret = tsk->mm->mmap;
1878
1879 if (ret)
1880 return ret;
1881 return gate_vma;
1882}
1883/*
1884 * Helper function for iterating across a vma list. It ensures that the caller
1885 * will visit `gate_vma' prior to terminating the search.
1886 */
1887static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1888 struct vm_area_struct *gate_vma)
1889{
1890 struct vm_area_struct *ret;
1891
1892 ret = this_vma->vm_next;
1893 if (ret)
1894 return ret;
1895 if (this_vma == gate_vma)
1896 return NULL;
1897 return gate_vma;
1898}
1899
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900/*
1901 * Actual dumper
1902 *
1903 * This is a two-pass process; first we find the offsets of the bits,
1904 * and then they are actually written out. If we run out of core limit
1905 * we just truncate.
1906 */
Neil Horman7dc0b222007-10-16 23:26:34 -07001907static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909 int has_dumped = 0;
1910 mm_segment_t fs;
1911 int segs;
1912 size_t size = 0;
Roland McGrathf47aef52007-01-26 00:56:49 -08001913 struct vm_area_struct *vma, *gate_vma;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 struct elfhdr *elf = NULL;
Andi Kleend025c9d2006-09-30 23:29:28 -07001915 loff_t offset = 0, dataoff, foffset;
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001916 unsigned long mm_flags;
Roland McGrath3aba4812008-01-30 13:31:44 +01001917 struct elf_note_info info;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918
1919 /*
1920 * We no longer stop all VM operations.
1921 *
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001922 * This is because those proceses that could possibly change map_count
1923 * or the mmap / vma pages are now blocked in do_exit on current
1924 * finishing this core dump.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925 *
1926 * Only ptrace can touch these memory addresses, but it doesn't change
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001927 * the map_count or the pages allocated. So no possibility of crashing
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928 * exists while dumping the mm->vm_next areas to the core file.
1929 */
1930
1931 /* alloc memory for large data structures: too large to be on stack */
1932 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1933 if (!elf)
WANG Cong5f719552008-05-06 12:45:35 +08001934 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935
1936 segs = current->mm->map_count;
1937#ifdef ELF_CORE_EXTRA_PHDRS
1938 segs += ELF_CORE_EXTRA_PHDRS;
1939#endif
1940
Roland McGrathf47aef52007-01-26 00:56:49 -08001941 gate_vma = get_gate_vma(current);
1942 if (gate_vma != NULL)
1943 segs++;
1944
Roland McGrath3aba4812008-01-30 13:31:44 +01001945 /*
1946 * Collect all the non-memory information about the process for the
1947 * notes. This also sets up the file header.
1948 */
1949 if (!fill_note_info(elf, segs + 1, /* including notes section */
1950 &info, signr, regs))
1951 goto cleanup;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952
1953 has_dumped = 1;
1954 current->flags |= PF_DUMPCORE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955
1956 fs = get_fs();
1957 set_fs(KERNEL_DS);
1958
1959 DUMP_WRITE(elf, sizeof(*elf));
1960 offset += sizeof(*elf); /* Elf header */
Petr Vandroveca7a0d862006-10-13 18:42:07 +02001961 offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1962 foffset = offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963
1964 /* Write notes phdr entry */
1965 {
1966 struct elf_phdr phdr;
Roland McGrath3aba4812008-01-30 13:31:44 +01001967 size_t sz = get_note_info_size(&info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968
Michael Ellermane5501492007-09-19 14:38:12 +10001969 sz += elf_coredump_extra_notes_size();
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01001970
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971 fill_elf_note_phdr(&phdr, sz, offset);
1972 offset += sz;
1973 DUMP_WRITE(&phdr, sizeof(phdr));
1974 }
1975
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1977
Kawai, Hidehiroa1b59e82007-07-19 01:48:29 -07001978 /*
1979 * We must use the same mm->flags while dumping core to avoid
1980 * inconsistency between the program headers and bodies, otherwise an
1981 * unusable core file can be generated.
1982 */
1983 mm_flags = current->mm->flags;
1984
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985 /* Write program headers for segments dump */
Roland McGrathf47aef52007-01-26 00:56:49 -08001986 for (vma = first_vma(current, gate_vma); vma != NULL;
1987 vma = next_vma(vma, gate_vma)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988 struct elf_phdr phdr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989
1990 phdr.p_type = PT_LOAD;
1991 phdr.p_offset = offset;
1992 phdr.p_vaddr = vma->vm_start;
1993 phdr.p_paddr = 0;
Roland McGrath82df3972007-10-16 23:27:02 -07001994 phdr.p_filesz = vma_dump_size(vma, mm_flags);
1995 phdr.p_memsz = vma->vm_end - vma->vm_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996 offset += phdr.p_filesz;
1997 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07001998 if (vma->vm_flags & VM_WRITE)
1999 phdr.p_flags |= PF_W;
2000 if (vma->vm_flags & VM_EXEC)
2001 phdr.p_flags |= PF_X;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002 phdr.p_align = ELF_EXEC_PAGESIZE;
2003
2004 DUMP_WRITE(&phdr, sizeof(phdr));
2005 }
2006
2007#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2008 ELF_CORE_WRITE_EXTRA_PHDRS;
2009#endif
2010
2011 /* write out the notes section */
Roland McGrath3aba4812008-01-30 13:31:44 +01002012 if (!write_note_info(&info, file, &foffset))
2013 goto end_coredump;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014
Michael Ellermane5501492007-09-19 14:38:12 +10002015 if (elf_coredump_extra_notes_write(file, &foffset))
2016 goto end_coredump;
Dwayne Grant McConnellbf1ab972006-11-23 00:46:37 +01002017
Andi Kleend025c9d2006-09-30 23:29:28 -07002018 /* Align to page */
2019 DUMP_SEEK(dataoff - foffset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020
Roland McGrathf47aef52007-01-26 00:56:49 -08002021 for (vma = first_vma(current, gate_vma); vma != NULL;
2022 vma = next_vma(vma, gate_vma)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023 unsigned long addr;
Roland McGrath82df3972007-10-16 23:27:02 -07002024 unsigned long end;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025
Roland McGrath82df3972007-10-16 23:27:02 -07002026 end = vma->vm_start + vma_dump_size(vma, mm_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027
Roland McGrath82df3972007-10-16 23:27:02 -07002028 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07002029 struct page *page;
WANG Cong4220b7f2008-04-29 01:01:18 -07002030 struct vm_area_struct *tmp_vma;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002031
2032 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
WANG Cong4220b7f2008-04-29 01:01:18 -07002033 &page, &tmp_vma) <= 0) {
Andi Kleend025c9d2006-09-30 23:29:28 -07002034 DUMP_SEEK(PAGE_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035 } else {
Nick Piggin557ed1f2007-10-16 01:24:40 -07002036 if (page == ZERO_PAGE(0)) {
Brian Pomerantz03221702007-04-01 23:49:41 -07002037 if (!dump_seek(file, PAGE_SIZE)) {
2038 page_cache_release(page);
2039 goto end_coredump;
2040 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041 } else {
2042 void *kaddr;
WANG Cong4220b7f2008-04-29 01:01:18 -07002043 flush_cache_page(tmp_vma, addr,
Jesper Juhlf4e5cc22006-06-23 02:05:35 -07002044 page_to_pfn(page));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045 kaddr = kmap(page);
2046 if ((size += PAGE_SIZE) > limit ||
2047 !dump_write(file, kaddr,
2048 PAGE_SIZE)) {
2049 kunmap(page);
2050 page_cache_release(page);
2051 goto end_coredump;
2052 }
2053 kunmap(page);
2054 }
2055 page_cache_release(page);
2056 }
2057 }
2058 }
2059
2060#ifdef ELF_CORE_WRITE_EXTRA_DATA
2061 ELF_CORE_WRITE_EXTRA_DATA;
2062#endif
2063
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064end_coredump:
2065 set_fs(fs);
2066
2067cleanup:
Roland McGrath3aba4812008-01-30 13:31:44 +01002068 free_note_info(&info);
WANG Cong5f719552008-05-06 12:45:35 +08002069 kfree(elf);
2070out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071 return has_dumped;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002072}
2073
2074#endif /* USE_ELF_CORE_DUMP */
2075
2076static int __init init_elf_binfmt(void)
2077{
2078 return register_binfmt(&elf_format);
2079}
2080
2081static void __exit exit_elf_binfmt(void)
2082{
2083 /* Remove the COFF and ELF loaders. */
2084 unregister_binfmt(&elf_format);
2085}
2086
2087core_initcall(init_elf_binfmt);
2088module_exit(exit_elf_binfmt);
2089MODULE_LICENSE("GPL");