x86: begin fault_{32|64}.c unification
Move X86_32 only get_segment_eip to X86_64
Move X86_64 only is_errata93 to X86_32
Change X86_32 loop in is_prefetch to highlight the differences
between them. Fold the logic from __is_prefetch in as well on
X86_32.
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 7e98a76..0d3d597 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -64,32 +64,136 @@
#endif
}
-/* Sometimes the CPU reports invalid exceptions on prefetch.
- Check that here and ignore.
- Opcode checker based on code by Richard Brunner */
-static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
- unsigned long error_code)
+#ifdef CONFIG_X86_32
+/*
+ * Return EIP plus the CS segment base. The segment limit is also
+ * adjusted, clamped to the kernel/user address space (whichever is
+ * appropriate), and returned in *eip_limit.
+ *
+ * The segment is checked, because it might have been changed by another
+ * task between the original faulting instruction and here.
+ *
+ * If CS is no longer a valid code segment, or if EIP is beyond the
+ * limit, or if it is a kernel address when CS is not a kernel segment,
+ * then the returned value will be greater than *eip_limit.
+ *
+ * This is slow, but is very rarely executed.
+ */
+static inline unsigned long get_segment_eip(struct pt_regs *regs,
+ unsigned long *eip_limit)
+{
+ unsigned long ip = regs->ip;
+ unsigned seg = regs->cs & 0xffff;
+ u32 seg_ar, seg_limit, base, *desc;
+
+ /* Unlikely, but must come before segment checks. */
+ if (unlikely(regs->flags & VM_MASK)) {
+ base = seg << 4;
+ *eip_limit = base + 0xffff;
+ return base + (ip & 0xffff);
+ }
+
+ /* The standard kernel/user address space limit. */
+ *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
+
+ /* By far the most common cases. */
+ if (likely(SEGMENT_IS_FLAT_CODE(seg)))
+ return ip;
+
+ /* Check the segment exists, is within the current LDT/GDT size,
+ that kernel/user (ring 0..3) has the appropriate privilege,
+ that it's a code segment, and get the limit. */
+ __asm__("larl %3,%0; lsll %3,%1"
+ : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg));
+ if ((~seg_ar & 0x9800) || ip > seg_limit) {
+ *eip_limit = 0;
+ return 1; /* So that returned ip > *eip_limit. */
+ }
+
+ /* Get the GDT/LDT descriptor base.
+ When you look for races in this code remember that
+ LDT and other horrors are only used in user space. */
+ if (seg & (1<<2)) {
+ /* Must lock the LDT while reading it. */
+ mutex_lock(¤t->mm->context.lock);
+ desc = current->mm->context.ldt;
+ desc = (void *)desc + (seg & ~7);
+ } else {
+ /* Must disable preemption while reading the GDT. */
+ desc = (u32 *)get_cpu_gdt_table(get_cpu());
+ desc = (void *)desc + (seg & ~7);
+ }
+
+ /* Decode the code segment base from the descriptor */
+ base = get_desc_base((struct desc_struct *)desc);
+
+ if (seg & (1<<2))
+ mutex_unlock(¤t->mm->context.lock);
+ else
+ put_cpu();
+
+ /* Adjust EIP and segment limit, and clamp at the kernel limit.
+ It's legitimate for segments to wrap at 0xffffffff. */
+ seg_limit += base;
+ if (seg_limit < *eip_limit && seg_limit >= base)
+ *eip_limit = seg_limit;
+ return ip + base;
+}
+#endif
+
+/*
+ * X86_32
+ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ *
+ * X86_64
+ * Sometimes the CPU reports invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ *
+ * Opcode checker based on code by Richard Brunner
+ */
+static int is_prefetch(struct pt_regs *regs, unsigned long addr,
+ unsigned long error_code)
{
unsigned char *instr;
int scan_more = 1;
int prefetch = 0;
unsigned char *max_instr;
+#ifdef CONFIG_X86_32
+ unsigned long limit;
+ if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+ boot_cpu_data.x86 >= 6)) {
+ /* Catch an obscure case of prefetch inside an NX page. */
+ if (nx_enabled && (error_code & PF_INSTR))
+ return 0;
+ } else {
+ return 0;
+ }
+ instr = (unsigned char *)get_segment_eip(regs, &limit);
+#else
/* If it was a exec fault ignore */
if (error_code & PF_INSTR)
return 0;
-
instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
+#endif
+
max_instr = instr + 15;
+#ifdef CONFIG_X86_64
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
return 0;
+#endif
while (scan_more && instr < max_instr) {
unsigned char opcode;
unsigned char instr_hi;
unsigned char instr_lo;
+#ifdef CONFIG_X86_32
+ if (instr > (unsigned char *)limit)
+ break;
+#endif
if (probe_kernel_address(instr, opcode))
break;
@@ -125,12 +229,16 @@
scan_more = (instr_lo & 0xC) == 0x4;
break;
case 0xF0:
- /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
+ /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
scan_more = !instr_lo || (instr_lo>>1) == 1;
break;
case 0x00:
/* Prefetch instruction is 0x0F0D or 0x0F18 */
scan_more = 0;
+#ifdef CONFIG_X86_32
+ if (instr > (unsigned char *)limit)
+ break;
+#endif
if (probe_kernel_address(instr, opcode))
break;
prefetch = (instr_lo == 0xF) &&
@@ -185,6 +293,7 @@
printk("BAD\n");
}
+#ifdef CONFIG_X86_64
static const char errata93_warning[] =
KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
@@ -218,6 +327,7 @@
}
return 0;
}
+#endif
static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
unsigned long error_code)