blob: a280b11e2122b8b3fb006c542946e2b6a4ea523b [file] [log] [blame]
Andi Kleen2aae9502007-07-21 17:10:01 +02001/*
Andi Kleen2aae9502007-07-21 17:10:01 +02002 * Copyright 2007 Andi Kleen, SUSE Labs.
3 * Subject to the GPL, v.2
Andy Lutomirski1c0c1b92014-09-23 10:50:57 -07004 *
5 * This contains most of the x86 vDSO kernel-side code.
Andi Kleen2aae9502007-07-21 17:10:01 +02006 */
7#include <linux/mm.h>
Alexey Dobriyan4e950f62007-07-30 02:36:13 +04008#include <linux/err.h>
Andi Kleen2aae9502007-07-21 17:10:01 +02009#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090010#include <linux/slab.h>
Andi Kleen2aae9502007-07-21 17:10:01 +020011#include <linux/init.h>
12#include <linux/random.h>
Jaswinder Singh Rajput3fa89ca2009-04-12 20:37:25 +053013#include <linux/elf.h>
Andy Lutomirskid4f829d2014-09-23 10:50:52 -070014#include <linux/cpu.h>
Andi Kleen2aae9502007-07-21 17:10:01 +020015#include <asm/vgtod.h>
16#include <asm/proto.h>
Roland McGrath7f3646a2008-01-30 13:30:41 +010017#include <asm/vdso.h>
Andy Lutomirski1c0c1b92014-09-23 10:50:57 -070018#include <asm/vvar.h>
Andy Lutomirskiaafade22011-07-21 15:47:10 -040019#include <asm/page.h>
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070020#include <asm/hpet.h>
Andy Lutomirskid4f829d2014-09-23 10:50:52 -070021#include <asm/desc.h>
Roland McGrath7f3646a2008-01-30 13:30:41 +010022
Andy Lutomirskib4b541a2014-03-17 23:22:08 +010023#if defined(CONFIG_X86_64)
Andy Lutomirski3d7ee962014-05-05 12:19:32 -070024unsigned int __read_mostly vdso64_enabled = 1;
Andy Lutomirskib4b541a2014-03-17 23:22:08 +010025#endif
H. J. Lu1a21d4e2012-02-19 11:38:06 -080026
Andy Lutomirski6f121e52014-05-05 12:19:34 -070027void __init init_vdso_image(const struct vdso_image *image)
H. J. Lu1a21d4e2012-02-19 11:38:06 -080028{
H. J. Lu1a21d4e2012-02-19 11:38:06 -080029 int i;
Andy Lutomirski6f121e52014-05-05 12:19:34 -070030 int npages = (image->size) / PAGE_SIZE;
H. J. Lu1a21d4e2012-02-19 11:38:06 -080031
Andy Lutomirski6f121e52014-05-05 12:19:34 -070032 BUG_ON(image->size % PAGE_SIZE != 0);
33 for (i = 0; i < npages; i++)
Andy Lutomirskia62c34b2014-05-19 15:58:33 -070034 image->text_mapping.pages[i] =
35 virt_to_page(image->data + i*PAGE_SIZE);
H. J. Lu1a21d4e2012-02-19 11:38:06 -080036
Andy Lutomirski6f121e52014-05-05 12:19:34 -070037 apply_alternatives((struct alt_instr *)(image->data + image->alt),
38 (struct alt_instr *)(image->data + image->alt +
39 image->alt_len));
H. J. Lu1a21d4e2012-02-19 11:38:06 -080040}
Andy Lutomirski6f121e52014-05-05 12:19:34 -070041
Andi Kleen2aae9502007-07-21 17:10:01 +020042struct linux_binprm;
43
44/* Put the vdso above the (randomized) stack with another randomized offset.
45 This way there is no hole in the middle of address space.
46 To save memory make sure it is still in the same PTE as the stack top.
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070047 This doesn't give that many random bits.
48
49 Only used for the 64-bit and x32 vdsos. */
Andi Kleen2aae9502007-07-21 17:10:01 +020050static unsigned long vdso_addr(unsigned long start, unsigned len)
51{
Jan Beulichd0936012014-07-03 15:35:07 +010052#ifdef CONFIG_X86_32
53 return 0;
54#else
Andi Kleen2aae9502007-07-21 17:10:01 +020055 unsigned long addr, end;
56 unsigned offset;
57 end = (start + PMD_SIZE - 1) & PMD_MASK;
Ingo Molnard9517342009-02-20 23:32:28 +010058 if (end >= TASK_SIZE_MAX)
59 end = TASK_SIZE_MAX;
Andi Kleen2aae9502007-07-21 17:10:01 +020060 end -= len;
61 /* This loses some more bits than a modulo, but is cheaper */
62 offset = get_random_int() & (PTRS_PER_PTE - 1);
63 addr = start + (offset << PAGE_SHIFT);
64 if (addr >= end)
65 addr = end;
Borislav Petkovdfb09f92011-08-05 15:15:08 +020066
67 /*
68 * page-align it here so that get_unmapped_area doesn't
69 * align it wrongfully again to the next page. addr can come in 4K
70 * unaligned here as a result of stack start randomization.
71 */
72 addr = PAGE_ALIGN(addr);
Michel Lespinassef99024722012-12-11 16:01:52 -080073 addr = align_vdso_addr(addr);
Borislav Petkovdfb09f92011-08-05 15:15:08 +020074
Andi Kleen2aae9502007-07-21 17:10:01 +020075 return addr;
Jan Beulichd0936012014-07-03 15:35:07 +010076#endif
Andi Kleen2aae9502007-07-21 17:10:01 +020077}
78
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070079static int map_vdso(const struct vdso_image *image, bool calculate_addr)
Andi Kleen2aae9502007-07-21 17:10:01 +020080{
81 struct mm_struct *mm = current->mm;
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070082 struct vm_area_struct *vma;
Andy Lutomirskie6577a72014-07-10 18:13:15 -070083 unsigned long addr, text_start;
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070084 int ret = 0;
Andy Lutomirski1e844fb2014-05-19 15:58:31 -070085 static struct page *no_pages[] = {NULL};
Andy Lutomirskia62c34b2014-05-19 15:58:33 -070086 static struct vm_special_mapping vvar_mapping = {
87 .name = "[vvar]",
88 .pages = no_pages,
89 };
Andi Kleen2aae9502007-07-21 17:10:01 +020090
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070091 if (calculate_addr) {
92 addr = vdso_addr(current->mm->start_stack,
Andy Lutomirskie6577a72014-07-10 18:13:15 -070093 image->size - image->sym_vvar_start);
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070094 } else {
95 addr = 0;
96 }
Andi Kleen2aae9502007-07-21 17:10:01 +020097
98 down_write(&mm->mmap_sem);
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -070099
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700100 addr = get_unmapped_area(NULL, addr,
101 image->size - image->sym_vvar_start, 0, 0);
Andi Kleen2aae9502007-07-21 17:10:01 +0200102 if (IS_ERR_VALUE(addr)) {
103 ret = addr;
104 goto up_fail;
105 }
106
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700107 text_start = addr - image->sym_vvar_start;
108 current->mm->context.vdso = (void __user *)text_start;
Peter Zijlstraf7b6eb32009-06-05 14:04:51 +0200109
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700110 /*
111 * MAYWRITE to allow gdb to COW and set breakpoints
112 */
Andy Lutomirskia62c34b2014-05-19 15:58:33 -0700113 vma = _install_special_mapping(mm,
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700114 text_start,
Andy Lutomirskia62c34b2014-05-19 15:58:33 -0700115 image->size,
116 VM_READ|VM_EXEC|
117 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
118 &image->text_mapping);
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700119
Andy Lutomirskia62c34b2014-05-19 15:58:33 -0700120 if (IS_ERR(vma)) {
121 ret = PTR_ERR(vma);
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700122 goto up_fail;
Andy Lutomirskia62c34b2014-05-19 15:58:33 -0700123 }
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700124
125 vma = _install_special_mapping(mm,
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700126 addr,
127 -image->sym_vvar_start,
Andy Lutomirskiac379832014-07-25 16:27:01 -0700128 VM_READ|VM_MAYREAD,
Andy Lutomirskia62c34b2014-05-19 15:58:33 -0700129 &vvar_mapping);
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700130
131 if (IS_ERR(vma)) {
132 ret = PTR_ERR(vma);
Andi Kleen2aae9502007-07-21 17:10:01 +0200133 goto up_fail;
Peter Zijlstraf7b6eb32009-06-05 14:04:51 +0200134 }
Andi Kleen2aae9502007-07-21 17:10:01 +0200135
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700136 if (image->sym_vvar_page)
137 ret = remap_pfn_range(vma,
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700138 text_start + image->sym_vvar_page,
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700139 __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
140 PAGE_SIZE,
141 PAGE_READONLY);
142
143 if (ret)
144 goto up_fail;
145
146#ifdef CONFIG_HPET_TIMER
147 if (hpet_address && image->sym_hpet_page) {
148 ret = io_remap_pfn_range(vma,
Andy Lutomirskie6577a72014-07-10 18:13:15 -0700149 text_start + image->sym_hpet_page,
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700150 hpet_address >> PAGE_SHIFT,
151 PAGE_SIZE,
152 pgprot_noncached(PAGE_READONLY));
153
154 if (ret)
155 goto up_fail;
156 }
157#endif
158
Andi Kleen2aae9502007-07-21 17:10:01 +0200159up_fail:
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700160 if (ret)
161 current->mm->context.vdso = NULL;
162
Andi Kleen2aae9502007-07-21 17:10:01 +0200163 up_write(&mm->mmap_sem);
164 return ret;
165}
166
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700167#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
168static int load_vdso32(void)
H. J. Lu1a21d4e2012-02-19 11:38:06 -0800169{
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700170 int ret;
H. J. Lu1a21d4e2012-02-19 11:38:06 -0800171
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700172 if (vdso32_enabled != 1) /* Other values all mean "disabled" */
173 return 0;
174
175 ret = map_vdso(selected_vdso32, false);
176 if (ret)
177 return ret;
178
179 if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
180 current_thread_info()->sysenter_return =
181 current->mm->context.vdso +
182 selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
183
184 return 0;
H. J. Lu1a21d4e2012-02-19 11:38:06 -0800185}
186#endif
187
Andy Lutomirski18d0a6f2014-05-05 12:19:35 -0700188#ifdef CONFIG_X86_64
189int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
190{
191 if (!vdso64_enabled)
192 return 0;
193
194 return map_vdso(&vdso_image_64, true);
195}
196
197#ifdef CONFIG_COMPAT
198int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
199 int uses_interp)
200{
201#ifdef CONFIG_X86_X32_ABI
202 if (test_thread_flag(TIF_X32)) {
203 if (!vdso64_enabled)
204 return 0;
205
206 return map_vdso(&vdso_image_x32, true);
207 }
208#endif
209
210 return load_vdso32();
211}
212#endif
213#else
214int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
215{
216 return load_vdso32();
217}
218#endif
219
220#ifdef CONFIG_X86_64
Andi Kleen2aae9502007-07-21 17:10:01 +0200221static __init int vdso_setup(char *s)
222{
Andy Lutomirski3d7ee962014-05-05 12:19:32 -0700223 vdso64_enabled = simple_strtoul(s, NULL, 0);
Andi Kleen2aae9502007-07-21 17:10:01 +0200224 return 0;
225}
226__setup("vdso=", vdso_setup);
Andy Lutomirskib4b541a2014-03-17 23:22:08 +0100227#endif
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700228
229#ifdef CONFIG_X86_64
Andy Lutomirski1c0c1b92014-09-23 10:50:57 -0700230static void vgetcpu_cpu_init(void *arg)
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700231{
Andy Lutomirski1c0c1b92014-09-23 10:50:57 -0700232 int cpu = smp_processor_id();
Andy Lutomirski25880152014-09-23 10:50:53 -0700233 struct desc_struct d;
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700234 unsigned long node = 0;
235#ifdef CONFIG_NUMA
236 node = cpu_to_node(cpu);
237#endif
238 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
239 write_rdtscp_aux((node << 12) | cpu);
240
241 /*
Andy Lutomirski25880152014-09-23 10:50:53 -0700242 * Store cpu number in limit so that it can be loaded
243 * quickly in user space in vgetcpu. (12 bits for the CPU
244 * and 8 bits for the node)
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700245 */
Andy Lutomirski25880152014-09-23 10:50:53 -0700246 d = (struct desc_struct) {
247 .limit0 = cpu | ((node & 0xf) << 12),
248 .limit = node >> 4,
Andy Lutomirski9c0080e2014-09-23 10:50:54 -0700249 .type = 5, /* RO data, expand down, accessed */
Andy Lutomirski25880152014-09-23 10:50:53 -0700250 .dpl = 3, /* Visible to user code */
251 .s = 1, /* Not a system segment */
252 .p = 1, /* Present */
Andy Lutomirski287e0132014-09-23 10:50:55 -0700253 .d = 1, /* 32-bit */
Andy Lutomirski25880152014-09-23 10:50:53 -0700254 };
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700255
256 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
257}
258
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700259static int
Andy Lutomirski1c0c1b92014-09-23 10:50:57 -0700260vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700261{
262 long cpu = (long)arg;
263
264 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
Andy Lutomirski1c0c1b92014-09-23 10:50:57 -0700265 smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700266
267 return NOTIFY_DONE;
268}
269
Andy Lutomirski1c0c1b92014-09-23 10:50:57 -0700270static int __init init_vdso(void)
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700271{
Andy Lutomirski1c0c1b92014-09-23 10:50:57 -0700272 init_vdso_image(&vdso_image_64);
273
274#ifdef CONFIG_X86_X32_ABI
275 init_vdso_image(&vdso_image_x32);
276#endif
277
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700278 cpu_notifier_register_begin();
279
Andy Lutomirski1c0c1b92014-09-23 10:50:57 -0700280 on_each_cpu(vgetcpu_cpu_init, NULL, 1);
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700281 /* notifier priority > KVM */
Andy Lutomirski1c0c1b92014-09-23 10:50:57 -0700282 __hotcpu_notifier(vgetcpu_cpu_notifier, 30);
Andy Lutomirskid4f829d2014-09-23 10:50:52 -0700283
284 cpu_notifier_register_done();
285
286 return 0;
287}
Andy Lutomirski1c0c1b92014-09-23 10:50:57 -0700288subsys_initcall(init_vdso);
289#endif /* CONFIG_X86_64 */