Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2 | * (C) Copyright 2002 Linus Torvalds |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 3 | * Portions based on the vdso-randomization code from exec-shield: |
| 4 | * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 5 | * |
| 6 | * This file contains the needed initializations to support sysenter. |
| 7 | */ |
| 8 | |
| 9 | #include <linux/init.h> |
| 10 | #include <linux/smp.h> |
| 11 | #include <linux/thread_info.h> |
| 12 | #include <linux/sched.h> |
| 13 | #include <linux/gfp.h> |
| 14 | #include <linux/string.h> |
| 15 | #include <linux/elf.h> |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 16 | #include <linux/mm.h> |
Alexey Dobriyan | 4e950f6 | 2007-07-30 02:36:13 +0400 | [diff] [blame] | 17 | #include <linux/err.h> |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 18 | #include <linux/module.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 19 | |
| 20 | #include <asm/cpufeature.h> |
| 21 | #include <asm/msr.h> |
| 22 | #include <asm/pgtable.h> |
| 23 | #include <asm/unistd.h> |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 24 | #include <asm/elf.h> |
Jeremy Fitzhardinge | 1dbf527c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 25 | #include <asm/tlbflush.h> |
Roland McGrath | 6c3652e | 2008-01-30 13:30:42 +0100 | [diff] [blame] | 26 | #include <asm/vdso.h> |
Jeremy Fitzhardinge | 1dbf527c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 27 | |
| 28 | enum { |
| 29 | VDSO_DISABLED = 0, |
| 30 | VDSO_ENABLED = 1, |
| 31 | VDSO_COMPAT = 2, |
| 32 | }; |
| 33 | |
| 34 | #ifdef CONFIG_COMPAT_VDSO |
| 35 | #define VDSO_DEFAULT VDSO_COMPAT |
| 36 | #else |
| 37 | #define VDSO_DEFAULT VDSO_ENABLED |
| 38 | #endif |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 39 | |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 40 | /* |
| 41 | * Should the kernel map a VDSO page into processes and pass its |
| 42 | * address down to glibc upon exec()? |
| 43 | */ |
Jeremy Fitzhardinge | 1dbf527c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 44 | unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 45 | |
| 46 | EXPORT_SYMBOL_GPL(vdso_enabled); |
| 47 | |
| 48 | static int __init vdso_setup(char *s) |
| 49 | { |
| 50 | vdso_enabled = simple_strtoul(s, NULL, 0); |
| 51 | |
| 52 | return 1; |
| 53 | } |
| 54 | |
| 55 | __setup("vdso=", vdso_setup); |
| 56 | |
Roland McGrath | 0aa97fb22 | 2008-01-30 13:30:43 +0100 | [diff] [blame^] | 57 | extern asmlinkage void ia32_sysenter_target(void); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 58 | |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 59 | static __init void reloc_symtab(Elf32_Ehdr *ehdr, |
| 60 | unsigned offset, unsigned size) |
| 61 | { |
| 62 | Elf32_Sym *sym = (void *)ehdr + offset; |
| 63 | unsigned nsym = size / sizeof(*sym); |
| 64 | unsigned i; |
| 65 | |
| 66 | for(i = 0; i < nsym; i++, sym++) { |
| 67 | if (sym->st_shndx == SHN_UNDEF || |
| 68 | sym->st_shndx == SHN_ABS) |
| 69 | continue; /* skip */ |
| 70 | |
| 71 | if (sym->st_shndx > SHN_LORESERVE) { |
| 72 | printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", |
| 73 | sym->st_shndx); |
| 74 | continue; |
| 75 | } |
| 76 | |
| 77 | switch(ELF_ST_TYPE(sym->st_info)) { |
| 78 | case STT_OBJECT: |
| 79 | case STT_FUNC: |
| 80 | case STT_SECTION: |
| 81 | case STT_FILE: |
| 82 | sym->st_value += VDSO_HIGH_BASE; |
| 83 | } |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) |
| 88 | { |
| 89 | Elf32_Dyn *dyn = (void *)ehdr + offset; |
| 90 | |
| 91 | for(; dyn->d_tag != DT_NULL; dyn++) |
| 92 | switch(dyn->d_tag) { |
| 93 | case DT_PLTGOT: |
| 94 | case DT_HASH: |
| 95 | case DT_STRTAB: |
| 96 | case DT_SYMTAB: |
| 97 | case DT_RELA: |
| 98 | case DT_INIT: |
| 99 | case DT_FINI: |
| 100 | case DT_REL: |
| 101 | case DT_DEBUG: |
| 102 | case DT_JMPREL: |
| 103 | case DT_VERSYM: |
| 104 | case DT_VERDEF: |
| 105 | case DT_VERNEED: |
| 106 | case DT_ADDRRNGLO ... DT_ADDRRNGHI: |
| 107 | /* definitely pointers needing relocation */ |
| 108 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; |
| 109 | break; |
| 110 | |
| 111 | case DT_ENCODING ... OLD_DT_LOOS-1: |
| 112 | case DT_LOOS ... DT_HIOS-1: |
| 113 | /* Tags above DT_ENCODING are pointers if |
| 114 | they're even */ |
| 115 | if (dyn->d_tag >= DT_ENCODING && |
| 116 | (dyn->d_tag & 1) == 0) |
| 117 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; |
| 118 | break; |
| 119 | |
| 120 | case DT_VERDEFNUM: |
| 121 | case DT_VERNEEDNUM: |
| 122 | case DT_FLAGS_1: |
| 123 | case DT_RELACOUNT: |
| 124 | case DT_RELCOUNT: |
| 125 | case DT_VALRNGLO ... DT_VALRNGHI: |
| 126 | /* definitely not pointers */ |
| 127 | break; |
| 128 | |
| 129 | case OLD_DT_LOOS ... DT_LOOS-1: |
| 130 | case DT_HIOS ... DT_VALRNGLO-1: |
| 131 | default: |
| 132 | if (dyn->d_tag > DT_ENCODING) |
| 133 | printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", |
| 134 | dyn->d_tag); |
| 135 | break; |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | static __init void relocate_vdso(Elf32_Ehdr *ehdr) |
| 140 | { |
| 141 | Elf32_Phdr *phdr; |
| 142 | Elf32_Shdr *shdr; |
| 143 | int i; |
| 144 | |
| 145 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || |
| 146 | !elf_check_arch(ehdr) || |
| 147 | ehdr->e_type != ET_DYN); |
| 148 | |
| 149 | ehdr->e_entry += VDSO_HIGH_BASE; |
| 150 | |
| 151 | /* rebase phdrs */ |
| 152 | phdr = (void *)ehdr + ehdr->e_phoff; |
| 153 | for (i = 0; i < ehdr->e_phnum; i++) { |
| 154 | phdr[i].p_vaddr += VDSO_HIGH_BASE; |
| 155 | |
| 156 | /* relocate dynamic stuff */ |
| 157 | if (phdr[i].p_type == PT_DYNAMIC) |
| 158 | reloc_dyn(ehdr, phdr[i].p_offset); |
| 159 | } |
| 160 | |
| 161 | /* rebase sections */ |
| 162 | shdr = (void *)ehdr + ehdr->e_shoff; |
| 163 | for(i = 0; i < ehdr->e_shnum; i++) { |
| 164 | if (!(shdr[i].sh_flags & SHF_ALLOC)) |
| 165 | continue; |
| 166 | |
| 167 | shdr[i].sh_addr += VDSO_HIGH_BASE; |
| 168 | |
| 169 | if (shdr[i].sh_type == SHT_SYMTAB || |
| 170 | shdr[i].sh_type == SHT_DYNSYM) |
| 171 | reloc_symtab(ehdr, shdr[i].sh_offset, |
| 172 | shdr[i].sh_size); |
| 173 | } |
| 174 | } |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 175 | |
Li Shaohua | 6fe940d | 2005-06-25 14:54:53 -0700 | [diff] [blame] | 176 | void enable_sep_cpu(void) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 177 | { |
| 178 | int cpu = get_cpu(); |
| 179 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
| 180 | |
Li Shaohua | 6fe940d | 2005-06-25 14:54:53 -0700 | [diff] [blame] | 181 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
| 182 | put_cpu(); |
| 183 | return; |
| 184 | } |
| 185 | |
Rusty Russell | a75c54f | 2007-05-02 19:27:13 +0200 | [diff] [blame] | 186 | tss->x86_tss.ss1 = __KERNEL_CS; |
| 187 | tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 188 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); |
Rusty Russell | a75c54f | 2007-05-02 19:27:13 +0200 | [diff] [blame] | 189 | wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0); |
Roland McGrath | 0aa97fb22 | 2008-01-30 13:30:43 +0100 | [diff] [blame^] | 190 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 191 | put_cpu(); |
| 192 | } |
| 193 | |
Jeremy Fitzhardinge | 1dbf527c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 194 | static struct vm_area_struct gate_vma; |
| 195 | |
| 196 | static int __init gate_vma_init(void) |
| 197 | { |
| 198 | gate_vma.vm_mm = NULL; |
| 199 | gate_vma.vm_start = FIXADDR_USER_START; |
| 200 | gate_vma.vm_end = FIXADDR_USER_END; |
| 201 | gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; |
| 202 | gate_vma.vm_page_prot = __P101; |
| 203 | /* |
| 204 | * Make sure the vDSO gets into every core dump. |
| 205 | * Dumping its contents makes post-mortem fully interpretable later |
| 206 | * without matching up the same kernel and hardware config to see |
| 207 | * what PC values meant. |
| 208 | */ |
| 209 | gate_vma.vm_flags |= VM_ALWAYSDUMP; |
| 210 | return 0; |
| 211 | } |
| 212 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 213 | /* |
| 214 | * These symbols are defined by vsyscall.o to mark the bounds |
| 215 | * of the ELF DSO images included therein. |
| 216 | */ |
| 217 | extern const char vsyscall_int80_start, vsyscall_int80_end; |
| 218 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; |
Roland McGrath | 7d91d53 | 2007-02-08 14:20:42 -0800 | [diff] [blame] | 219 | static struct page *syscall_pages[1]; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 220 | |
Jeremy Fitzhardinge | 1dbf527c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 221 | static void map_compat_vdso(int map) |
| 222 | { |
| 223 | static int vdso_mapped; |
| 224 | |
| 225 | if (map == vdso_mapped) |
| 226 | return; |
| 227 | |
| 228 | vdso_mapped = map; |
| 229 | |
| 230 | __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT, |
| 231 | map ? PAGE_READONLY_EXEC : PAGE_NONE); |
| 232 | |
| 233 | /* flush stray tlbs */ |
| 234 | flush_tlb_all(); |
| 235 | } |
| 236 | |
Jeremy Fitzhardinge | a6c4e07 | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 237 | int __init sysenter_setup(void) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 238 | { |
Roland McGrath | 7d91d53 | 2007-02-08 14:20:42 -0800 | [diff] [blame] | 239 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 240 | const void *vsyscall; |
| 241 | size_t vsyscall_len; |
| 242 | |
Roland McGrath | 7d91d53 | 2007-02-08 14:20:42 -0800 | [diff] [blame] | 243 | syscall_pages[0] = virt_to_page(syscall_page); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 244 | |
Jeremy Fitzhardinge | 1dbf527c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 245 | gate_vma_init(); |
| 246 | |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 247 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 248 | |
| 249 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 250 | vsyscall = &vsyscall_int80_start; |
| 251 | vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start; |
| 252 | } else { |
| 253 | vsyscall = &vsyscall_sysenter_start; |
| 254 | vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 255 | } |
| 256 | |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 257 | memcpy(syscall_page, vsyscall, vsyscall_len); |
| 258 | relocate_vdso(syscall_page); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 259 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 260 | return 0; |
| 261 | } |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 262 | |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 263 | /* Setup a VMA at program startup for the vsyscall page */ |
| 264 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) |
| 265 | { |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 266 | struct mm_struct *mm = current->mm; |
| 267 | unsigned long addr; |
Zachary Amsden | 752783c | 2007-05-02 19:27:16 +0200 | [diff] [blame] | 268 | int ret = 0; |
Jeremy Fitzhardinge | 1dbf527c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 269 | bool compat; |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 270 | |
| 271 | down_write(&mm->mmap_sem); |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 272 | |
Jeremy Fitzhardinge | 1dbf527c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 273 | /* Test compat mode once here, in case someone |
| 274 | changes it via sysctl */ |
| 275 | compat = (vdso_enabled == VDSO_COMPAT); |
| 276 | |
| 277 | map_compat_vdso(compat); |
| 278 | |
| 279 | if (compat) |
| 280 | addr = VDSO_HIGH_BASE; |
| 281 | else { |
| 282 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); |
| 283 | if (IS_ERR_VALUE(addr)) { |
| 284 | ret = addr; |
| 285 | goto up_fail; |
| 286 | } |
| 287 | |
| 288 | /* |
| 289 | * MAYWRITE to allow gdb to COW and set breakpoints |
| 290 | * |
| 291 | * Make sure the vDSO gets into every core dump. |
| 292 | * Dumping its contents makes post-mortem fully |
| 293 | * interpretable later without matching up the same |
| 294 | * kernel and hardware config to see what PC values |
| 295 | * meant. |
| 296 | */ |
| 297 | ret = install_special_mapping(mm, addr, PAGE_SIZE, |
| 298 | VM_READ|VM_EXEC| |
| 299 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| |
| 300 | VM_ALWAYSDUMP, |
| 301 | syscall_pages); |
| 302 | |
| 303 | if (ret) |
| 304 | goto up_fail; |
| 305 | } |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 306 | |
| 307 | current->mm->context.vdso = (void *)addr; |
| 308 | current_thread_info()->sysenter_return = |
Roland McGrath | 6c3652e | 2008-01-30 13:30:42 +0100 | [diff] [blame] | 309 | VDSO32_SYMBOL(addr, SYSENTER_RETURN); |
Jeremy Fitzhardinge | 1dbf527c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 310 | |
| 311 | up_fail: |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 312 | up_write(&mm->mmap_sem); |
Jeremy Fitzhardinge | 1dbf527c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 313 | |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 314 | return ret; |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 315 | } |
| 316 | |
| 317 | const char *arch_vma_name(struct vm_area_struct *vma) |
| 318 | { |
| 319 | if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) |
| 320 | return "[vdso]"; |
| 321 | return NULL; |
| 322 | } |
| 323 | |
| 324 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) |
| 325 | { |
Jeremy Fitzhardinge | 1dbf527c | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 326 | struct mm_struct *mm = tsk->mm; |
| 327 | |
| 328 | /* Check to see if this task was created in compat vdso mode */ |
| 329 | if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) |
| 330 | return &gate_vma; |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 331 | return NULL; |
| 332 | } |
| 333 | |
| 334 | int in_gate_area(struct task_struct *task, unsigned long addr) |
| 335 | { |
Jan Beulich | 2f30c00 | 2007-07-21 17:10:21 +0200 | [diff] [blame] | 336 | const struct vm_area_struct *vma = get_gate_vma(task); |
| 337 | |
| 338 | return vma && addr >= vma->vm_start && addr < vma->vm_end; |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 339 | } |
| 340 | |
| 341 | int in_gate_area_no_task(unsigned long addr) |
| 342 | { |
| 343 | return 0; |
| 344 | } |