x86, vdso: Move the 32-bit vdso special pages after the text

This unifies the vdso mapping code and teaches it how to map special
pages at addresses corresponding to symbols in the vdso image.  The
new code is used for all vdso variants, but so far only the 32-bit
variants use the new vvar page position.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/b6d7858ad7b5ac3fd3c29cab6d6d769bc45d195e.1399317206.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index cf21762..e915eae 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -15,6 +15,7 @@
 #include <asm/proto.h>
 #include <asm/vdso.h>
 #include <asm/page.h>
+#include <asm/hpet.h>
 
 #if defined(CONFIG_X86_64)
 unsigned int __read_mostly vdso64_enabled = 1;
@@ -36,7 +37,6 @@
 						image->alt_len));
 }
 
-
 #if defined(CONFIG_X86_64)
 static int __init init_vdso(void)
 {
@@ -49,13 +49,16 @@
 	return 0;
 }
 subsys_initcall(init_vdso);
+#endif
 
 struct linux_binprm;
 
 /* Put the vdso above the (randomized) stack with another randomized offset.
    This way there is no hole in the middle of address space.
    To save memory make sure it is still in the same PTE as the stack top.
-   This doesn't give that many random bits */
+   This doesn't give that many random bits.
+
+   Only used for the 64-bit and x32 vdsos. */
 static unsigned long vdso_addr(unsigned long start, unsigned len)
 {
 	unsigned long addr, end;
@@ -81,23 +84,23 @@
 	return addr;
 }
 
-/* Setup a VMA at program startup for the vsyscall page.
-   Not called for compat tasks */
-static int setup_additional_pages(struct linux_binprm *bprm,
-				  int uses_interp,
-				  struct page **pages,
-				  unsigned size)
+static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 {
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
 	unsigned long addr;
-	int ret;
+	int ret = 0;
 
-	if (!vdso64_enabled)
-		return 0;
+	if (calculate_addr) {
+		addr = vdso_addr(current->mm->start_stack,
+				 image->sym_end_mapping);
+	} else {
+		addr = 0;
+	}
 
 	down_write(&mm->mmap_sem);
-	addr = vdso_addr(mm->start_stack, size);
-	addr = get_unmapped_area(NULL, addr, size, 0, 0);
+
+	addr = get_unmapped_area(NULL, addr, image->sym_end_mapping, 0, 0);
 	if (IS_ERR_VALUE(addr)) {
 		ret = addr;
 		goto up_fail;
@@ -105,34 +108,115 @@
 
 	current->mm->context.vdso = (void __user *)addr;
 
-	ret = install_special_mapping(mm, addr, size,
+	/*
+	 * MAYWRITE to allow gdb to COW and set breakpoints
+	 */
+	ret = install_special_mapping(mm,
+				      addr,
+				      image->size,
 				      VM_READ|VM_EXEC|
 				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				      pages);
-	if (ret) {
-		current->mm->context.vdso = NULL;
+				      image->pages);
+
+	if (ret)
+		goto up_fail;
+
+	vma = _install_special_mapping(mm,
+				       addr + image->size,
+				       image->sym_end_mapping - image->size,
+				       VM_READ,
+				       NULL);
+
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
 		goto up_fail;
 	}
 
+	if (image->sym_vvar_page)
+		ret = remap_pfn_range(vma,
+				      addr + image->sym_vvar_page,
+				      __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
+				      PAGE_SIZE,
+				      PAGE_READONLY);
+
+	if (ret)
+		goto up_fail;
+
+#ifdef CONFIG_HPET_TIMER
+	if (hpet_address && image->sym_hpet_page) {
+		ret = io_remap_pfn_range(vma,
+			addr + image->sym_hpet_page,
+			hpet_address >> PAGE_SHIFT,
+			PAGE_SIZE,
+			pgprot_noncached(PAGE_READONLY));
+
+		if (ret)
+			goto up_fail;
+	}
+#endif
+
 up_fail:
+	if (ret)
+		current->mm->context.vdso = NULL;
+
 	up_write(&mm->mmap_sem);
 	return ret;
 }
 
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
+static int load_vdso32(void)
 {
-	return setup_additional_pages(bprm, uses_interp, vdso_image_64.pages,
-				      vdso_image_64.size);
-}
+	int ret;
 
-#ifdef CONFIG_X86_X32_ABI
-int x32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
-{
-	return setup_additional_pages(bprm, uses_interp, vdso_image_x32.pages,
-				      vdso_image_x32.size);
+	if (vdso32_enabled != 1)  /* Other values all mean "disabled" */
+		return 0;
+
+	ret = map_vdso(selected_vdso32, false);
+	if (ret)
+		return ret;
+
+	if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
+		current_thread_info()->sysenter_return =
+			current->mm->context.vdso +
+			selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
+
+	return 0;
 }
 #endif
 
+#ifdef CONFIG_X86_64
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	if (!vdso64_enabled)
+		return 0;
+
+	return map_vdso(&vdso_image_64, true);
+}
+
+#ifdef CONFIG_COMPAT
+int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
+				       int uses_interp)
+{
+#ifdef CONFIG_X86_X32_ABI
+	if (test_thread_flag(TIF_X32)) {
+		if (!vdso64_enabled)
+			return 0;
+
+		return map_vdso(&vdso_image_x32, true);
+	}
+#endif
+
+	return load_vdso32();
+}
+#endif
+#else
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	return load_vdso32();
+}
+#endif
+
+#ifdef CONFIG_X86_64
 static __init int vdso_setup(char *s)
 {
 	vdso64_enabled = simple_strtoul(s, NULL, 0);