blob: a155dca5edb5cf8f4ae46b8b21805ea63babc397 [file] [log] [blame]
Andi Kleen2aae9502007-07-21 17:10:01 +02001/*
2 * Set up the VMAs to tell the VM about the vDSO.
3 * Copyright 2007 Andi Kleen, SUSE Labs.
4 * Subject to the GPL, v.2
5 */
6#include <linux/mm.h>
Alexey Dobriyan4e950f62007-07-30 02:36:13 +04007#include <linux/err.h>
Andi Kleen2aae9502007-07-21 17:10:01 +02008#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +09009#include <linux/slab.h>
Andi Kleen2aae9502007-07-21 17:10:01 +020010#include <linux/init.h>
11#include <linux/random.h>
Jaswinder Singh Rajput3fa89ca2009-04-12 20:37:25 +053012#include <linux/elf.h>
Andy Lutomirskid4f829d2014-09-23 10:50:52 -070013#include <linux/cpu.h>
Andi Kleen2aae9502007-07-21 17:10:01 +020014#include <asm/vsyscall.h>
15#include <asm/vgtod.h>
16#include <asm/proto.h>
Roland McGrath7f3646a2008-01-30 13:30:41 +010017#include <asm/vdso.h>
Andy Lutomirskiaafade22011-07-21 15:47:10 -040018#include <asm/page.h>
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070019#include <asm/hpet.h>
Andy Lutomirskid4f829d2014-09-23 10:50:52 -070020#include <asm/desc.h>
Roland McGrath7f3646a2008-01-30 13:30:41 +010021
Andy Lutomirskib4b541a2014-03-17 23:22:08 +010022#if defined(CONFIG_X86_64)
Andy Lutomirski3d7ee962014-05-05 12:19:32 -070023unsigned int __read_mostly vdso64_enabled = 1;
Andi Kleen2aae9502007-07-21 17:10:01 +020024
Andi Kleen2aae9502007-07-21 17:10:01 +020025extern unsigned short vdso_sync_cpuid;
Andy Lutomirskib4b541a2014-03-17 23:22:08 +010026#endif
H. J. Lu1a21d4e2012-02-19 11:38:06 -080027
Andy Lutomirski6f121e52014-05-05 12:19:34 -070028void __init init_vdso_image(const struct vdso_image *image)
H. J. Lu1a21d4e2012-02-19 11:38:06 -080029{
H. J. Lu1a21d4e2012-02-19 11:38:06 -080030 int i;
Andy Lutomirski6f121e52014-05-05 12:19:34 -070031 int npages = (image->size) / PAGE_SIZE;
H. J. Lu1a21d4e2012-02-19 11:38:06 -080032
Andy Lutomirski6f121e52014-05-05 12:19:34 -070033 BUG_ON(image->size % PAGE_SIZE != 0);
34 for (i = 0; i < npages; i++)
Andy Lutomirskia62c34b2014-05-19 15:58:33 -070035 image->text_mapping.pages[i] =
36 virt_to_page(image->data + i*PAGE_SIZE);
H. J. Lu1a21d4e2012-02-19 11:38:06 -080037
Andy Lutomirski6f121e52014-05-05 12:19:34 -070038 apply_alternatives((struct alt_instr *)(image->data + image->alt),
39 (struct alt_instr *)(image->data + image->alt +
40 image->alt_len));
H. J. Lu1a21d4e2012-02-19 11:38:06 -080041}
Andy Lutomirski6f121e52014-05-05 12:19:34 -070042
Andy Lutomirskib4b541a2014-03-17 23:22:08 +010043#if defined(CONFIG_X86_64)
Andy Lutomirskiaafade22011-07-21 15:47:10 -040044static int __init init_vdso(void)
Andi Kleen2aae9502007-07-21 17:10:01 +020045{
Andy Lutomirski6f121e52014-05-05 12:19:34 -070046 init_vdso_image(&vdso_image_64);
Andi Kleen2aae9502007-07-21 17:10:01 +020047
H. J. Lu1a21d4e2012-02-19 11:38:06 -080048#ifdef CONFIG_X86_X32_ABI
Andy Lutomirski6f121e52014-05-05 12:19:34 -070049 init_vdso_image(&vdso_image_x32);
H. J. Lu1a21d4e2012-02-19 11:38:06 -080050#endif
51
Andi Kleen2aae9502007-07-21 17:10:01 +020052 return 0;
Andi Kleen2aae9502007-07-21 17:10:01 +020053}
Andy Lutomirskiaafade22011-07-21 15:47:10 -040054subsys_initcall(init_vdso);
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070055#endif
Andi Kleen2aae9502007-07-21 17:10:01 +020056
57struct linux_binprm;
58
59/* Put the vdso above the (randomized) stack with another randomized offset.
60 This way there is no hole in the middle of address space.
61 To save memory make sure it is still in the same PTE as the stack top.
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070062 This doesn't give that many random bits.
63
64 Only used for the 64-bit and x32 vdsos. */
Andi Kleen2aae9502007-07-21 17:10:01 +020065static unsigned long vdso_addr(unsigned long start, unsigned len)
66{
Jan Beulichd0936012014-07-03 15:35:07 +010067#ifdef CONFIG_X86_32
68 return 0;
69#else
Andi Kleen2aae9502007-07-21 17:10:01 +020070 unsigned long addr, end;
71 unsigned offset;
72 end = (start + PMD_SIZE - 1) & PMD_MASK;
Ingo Molnard9517342009-02-20 23:32:28 +010073 if (end >= TASK_SIZE_MAX)
74 end = TASK_SIZE_MAX;
Andi Kleen2aae9502007-07-21 17:10:01 +020075 end -= len;
76 /* This loses some more bits than a modulo, but is cheaper */
77 offset = get_random_int() & (PTRS_PER_PTE - 1);
78 addr = start + (offset << PAGE_SHIFT);
79 if (addr >= end)
80 addr = end;
Borislav Petkovdfb09f92011-08-05 15:15:08 +020081
82 /*
83 * page-align it here so that get_unmapped_area doesn't
84 * align it wrongfully again to the next page. addr can come in 4K
85 * unaligned here as a result of stack start randomization.
86 */
87 addr = PAGE_ALIGN(addr);
Michel Lespinassef99024722012-12-11 16:01:52 -080088 addr = align_vdso_addr(addr);
Borislav Petkovdfb09f92011-08-05 15:15:08 +020089
Andi Kleen2aae9502007-07-21 17:10:01 +020090 return addr;
Jan Beulichd0936012014-07-03 15:35:07 +010091#endif
Andi Kleen2aae9502007-07-21 17:10:01 +020092}
93
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070094static int map_vdso(const struct vdso_image *image, bool calculate_addr)
Andi Kleen2aae9502007-07-21 17:10:01 +020095{
96 struct mm_struct *mm = current->mm;
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070097 struct vm_area_struct *vma;
Andy Lutomirskie6577a72014-07-10 18:13:15 -070098 unsigned long addr, text_start;
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070099 int ret = 0;
Andy Lutomirski1e844fb2014-05-19 15:58:31 -0700100 static struct page *no_pages[] = {NULL};
Andy Lutomirskia62c34b2014-05-19 15:58:33 -0700101 static struct vm_special_mapping vvar_mapping = {
102 .name = "[vvar]",
103 .pages = no_pages,
104 };
Andi Kleen2aae9502007-07-21 17:10:01 +0200105
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700106 if (calculate_addr) {
107 addr = vdso_addr(current->mm->start_stack,
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700108 image->size - image->sym_vvar_start);
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700109 } else {
110 addr = 0;
111 }
Andi Kleen2aae9502007-07-21 17:10:01 +0200112
113 down_write(&mm->mmap_sem);
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700114
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700115 addr = get_unmapped_area(NULL, addr,
116 image->size - image->sym_vvar_start, 0, 0);
Andi Kleen2aae9502007-07-21 17:10:01 +0200117 if (IS_ERR_VALUE(addr)) {
118 ret = addr;
119 goto up_fail;
120 }
121
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700122 text_start = addr - image->sym_vvar_start;
123 current->mm->context.vdso = (void __user *)text_start;
Peter Zijlstraf7b6eb32009-06-05 14:04:51 +0200124
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700125 /*
126 * MAYWRITE to allow gdb to COW and set breakpoints
127 */
Andy Lutomirskia62c34b2014-05-19 15:58:33 -0700128 vma = _install_special_mapping(mm,
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700129 text_start,
Andy Lutomirskia62c34b2014-05-19 15:58:33 -0700130 image->size,
131 VM_READ|VM_EXEC|
132 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
133 &image->text_mapping);
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700134
Andy Lutomirskia62c34b2014-05-19 15:58:33 -0700135 if (IS_ERR(vma)) {
136 ret = PTR_ERR(vma);
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700137 goto up_fail;
Andy Lutomirskia62c34b2014-05-19 15:58:33 -0700138 }
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700139
140 vma = _install_special_mapping(mm,
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700141 addr,
142 -image->sym_vvar_start,
Andy Lutomirskiac379832014-07-25 16:27:01 -0700143 VM_READ|VM_MAYREAD,
Andy Lutomirskia62c34b2014-05-19 15:58:33 -0700144 &vvar_mapping);
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700145
146 if (IS_ERR(vma)) {
147 ret = PTR_ERR(vma);
Andi Kleen2aae9502007-07-21 17:10:01 +0200148 goto up_fail;
Peter Zijlstraf7b6eb32009-06-05 14:04:51 +0200149 }
Andi Kleen2aae9502007-07-21 17:10:01 +0200150
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700151 if (image->sym_vvar_page)
152 ret = remap_pfn_range(vma,
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700153 text_start + image->sym_vvar_page,
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700154 __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
155 PAGE_SIZE,
156 PAGE_READONLY);
157
158 if (ret)
159 goto up_fail;
160
161#ifdef CONFIG_HPET_TIMER
162 if (hpet_address && image->sym_hpet_page) {
163 ret = io_remap_pfn_range(vma,
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700164 text_start + image->sym_hpet_page,
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700165 hpet_address >> PAGE_SHIFT,
166 PAGE_SIZE,
167 pgprot_noncached(PAGE_READONLY));
168
169 if (ret)
170 goto up_fail;
171 }
172#endif
173
Andi Kleen2aae9502007-07-21 17:10:01 +0200174up_fail:
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700175 if (ret)
176 current->mm->context.vdso = NULL;
177
Andi Kleen2aae9502007-07-21 17:10:01 +0200178 up_write(&mm->mmap_sem);
179 return ret;
180}
181
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700182#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
183static int load_vdso32(void)
H. J. Lu1a21d4e2012-02-19 11:38:06 -0800184{
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700185 int ret;
H. J. Lu1a21d4e2012-02-19 11:38:06 -0800186
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700187 if (vdso32_enabled != 1) /* Other values all mean "disabled" */
188 return 0;
189
190 ret = map_vdso(selected_vdso32, false);
191 if (ret)
192 return ret;
193
194 if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
195 current_thread_info()->sysenter_return =
196 current->mm->context.vdso +
197 selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
198
199 return 0;
H. J. Lu1a21d4e2012-02-19 11:38:06 -0800200}
201#endif
202
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700203#ifdef CONFIG_X86_64
204int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
205{
206 if (!vdso64_enabled)
207 return 0;
208
209 return map_vdso(&vdso_image_64, true);
210}
211
212#ifdef CONFIG_COMPAT
213int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
214 int uses_interp)
215{
216#ifdef CONFIG_X86_X32_ABI
217 if (test_thread_flag(TIF_X32)) {
218 if (!vdso64_enabled)
219 return 0;
220
221 return map_vdso(&vdso_image_x32, true);
222 }
223#endif
224
225 return load_vdso32();
226}
227#endif
228#else
229int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
230{
231 return load_vdso32();
232}
233#endif
234
235#ifdef CONFIG_X86_64
Andi Kleen2aae9502007-07-21 17:10:01 +0200236static __init int vdso_setup(char *s)
237{
Andy Lutomirski3d7ee962014-05-05 12:19:32 -0700238 vdso64_enabled = simple_strtoul(s, NULL, 0);
Andi Kleen2aae9502007-07-21 17:10:01 +0200239 return 0;
240}
241__setup("vdso=", vdso_setup);
Andy Lutomirskib4b541a2014-03-17 23:22:08 +0100242#endif
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700243
244#ifdef CONFIG_X86_64
245/*
246 * Assume __initcall executes before all user space. Hopefully kmod
247 * doesn't violate that. We'll find out if it does.
248 */
249static void vsyscall_set_cpu(int cpu)
250{
251 unsigned long d;
252 unsigned long node = 0;
253#ifdef CONFIG_NUMA
254 node = cpu_to_node(cpu);
255#endif
256 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
257 write_rdtscp_aux((node << 12) | cpu);
258
259 /*
260 * Store cpu number in limit so that it can be loaded quickly
261 * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
262 */
263 d = 0x0f40000000000ULL;
264 d |= cpu;
265 d |= (node & 0xf) << 12;
266 d |= (node >> 4) << 48;
267
268 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
269}
270
271static void cpu_vsyscall_init(void *arg)
272{
273 /* preemption should be already off */
274 vsyscall_set_cpu(raw_smp_processor_id());
275}
276
277static int
278cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
279{
280 long cpu = (long)arg;
281
282 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
283 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
284
285 return NOTIFY_DONE;
286}
287
288static int __init vsyscall_init(void)
289{
290 cpu_notifier_register_begin();
291
292 on_each_cpu(cpu_vsyscall_init, NULL, 1);
293 /* notifier priority > KVM */
294 __hotcpu_notifier(cpu_vsyscall_notifier, 30);
295
296 cpu_notifier_register_done();
297
298 return 0;
299}
300__initcall(vsyscall_init);
301#endif