Merge tag 'v4.6-rc6' into x86/asm, to refresh the tree
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
index 54944c7..2a4ee63 100644
--- a/Documentation/x86/pat.txt
+++ b/Documentation/x86/pat.txt
@@ -196,3 +196,35 @@
"debugpat" boot parameter. With this parameter, various debug messages are
printed to dmesg log.
+PAT Initialization
+------------------
+
+The following table describes how PAT is initialized under various
+configurations. The PAT MSR must be updated by Linux in order to support WC
+and WT attributes. Otherwise, the PAT MSR has the value programmed in it
+by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
+
+ MTRR PAT Call Sequence PAT State PAT MSR
+ =========================================================
+ E E MTRR -> PAT init Enabled OS
+ E D MTRR -> PAT init Disabled -
+ D E MTRR -> PAT disable Disabled BIOS
+ D D MTRR -> PAT disable Disabled -
+ - np/E PAT -> PAT disable Disabled BIOS
+ - np/D PAT -> PAT disable Disabled -
+ E !P/E MTRR -> PAT init Disabled BIOS
+ D !P/E MTRR -> PAT disable Disabled BIOS
+ !M !P/E MTRR stub -> PAT disable Disabled BIOS
+
+ Legend
+ ------------------------------------------------
+ E Feature enabled in CPU
+ D Feature disabled/unsupported in CPU
+ np "nopat" boot option specified
+ !P CONFIG_X86_PAT option unset
+ !M CONFIG_MTRR option unset
+ Enabled PAT state set to enabled
+ Disabled PAT state set to disabled
+ OS PAT initializes PAT MSR with OS setting
+ BIOS PAT keeps PAT MSR with BIOS setting
+
diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
index 105c93b..1d12129 100644
--- a/arch/ia64/include/asm/iommu.h
+++ b/arch/ia64/include/asm/iommu.h
@@ -1,7 +1,6 @@
#ifndef _ASM_IA64_IOMMU_H
#define _ASM_IA64_IOMMU_H 1
-#define cpu_has_x2apic 0
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 064c7e2..5b7fa14 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1477,7 +1477,7 @@
}
aesni_ctr_enc_tfm = aesni_ctr_enc;
#ifdef CONFIG_AS_AVX
- if (cpu_has_avx) {
+ if (boot_cpu_has(X86_FEATURE_AVX)) {
/* optimize performance of ctr mode encryption transform */
aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm;
pr_info("AES CTR mode by8 optimization enabled\n");
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index d844569..60907c1 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -562,7 +562,10 @@
{
const char *feature_name;
- if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+ if (!boot_cpu_has(X86_FEATURE_AVX) ||
+ !boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX2 or AES-NI instructions are not detected.\n");
return -ENODEV;
}
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 93d8f29..d96429d 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -554,7 +554,9 @@
{
const char *feature_name;
- if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+ if (!boot_cpu_has(X86_FEATURE_AVX) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX or AES-NI instructions are not detected.\n");
return -ENODEV;
}
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index 8baaff5..2d5c2e0b 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -129,7 +129,8 @@
return -ENODEV;
#ifdef CONFIG_AS_AVX2
- chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+ chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#endif
return crypto_register_alg(&alg);
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 4264a3d..e32142b 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -179,11 +179,12 @@
static int __init poly1305_simd_mod_init(void)
{
- if (!cpu_has_xmm2)
+ if (!boot_cpu_has(X86_FEATURE_XMM2))
return -ENODEV;
#ifdef CONFIG_AS_AVX2
- poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+ poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
if (poly1305_use_avx2)
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 6d19834..870f6d8 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -538,7 +538,7 @@
{
const char *feature_name;
- if (!cpu_has_avx2 || !cpu_has_osxsave) {
+ if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX2 instructions are not detected.\n");
return -ENODEV;
}
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 8943407..644f97a 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -600,7 +600,7 @@
static int __init serpent_sse2_init(void)
{
- if (!cpu_has_xmm2) {
+ if (!boot_cpu_has(X86_FEATURE_XMM2)) {
printk(KERN_INFO "SSE2 instructions are not detected.\n");
return -ENODEV;
}
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index dd14616..1024e37 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -166,7 +166,7 @@
static bool avx_usable(void)
{
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
- if (cpu_has_avx)
+ if (boot_cpu_has(X86_FEATURE_AVX))
pr_info("AVX detected but unusable.\n");
return false;
}
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 5f4d608..3ae0f43 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -201,7 +201,7 @@
static bool avx_usable(void)
{
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
- if (cpu_has_avx)
+ if (boot_cpu_has(X86_FEATURE_AVX))
pr_info("AVX detected but unusable.\n");
return false;
}
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 34e5083..0b17c83 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -151,7 +151,7 @@
static bool avx_usable(void)
{
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
- if (cpu_has_avx)
+ if (boot_cpu_has(X86_FEATURE_AVX))
pr_info("AVX detected but unusable.\n");
return false;
}
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index e79d93d..ec138e5 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -191,7 +191,7 @@
long syscall_trace_enter(struct pt_regs *regs)
{
- u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+ u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
if (phase1_result == 0)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 858b555..6344629 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -781,19 +781,25 @@
pushfq
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
SWAPGS
-gs_change:
+.Lgs_change:
movl %edi, %gs
-2: mfence /* workaround */
+2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
SWAPGS
popfq
ret
END(native_load_gs_index)
- _ASM_EXTABLE(gs_change, bad_gs)
+ _ASM_EXTABLE(.Lgs_change, bad_gs)
.section .fixup, "ax"
/* running with kernelgs */
bad_gs:
SWAPGS /* switch back to user gs */
+.macro ZAP_GS
+ /* This can't be a string because the preprocessor needs to see it. */
+ movl $__USER_DS, %eax
+ movl %eax, %gs
+.endm
+ ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG
xorl %eax, %eax
movl %eax, %gs
jmp 2b
@@ -1019,13 +1025,13 @@
movl %ecx, %eax /* zero extend */
cmpq %rax, RIP+8(%rsp)
je .Lbstep_iret
- cmpq $gs_change, RIP+8(%rsp)
+ cmpq $.Lgs_change, RIP+8(%rsp)
jne .Lerror_entry_done
/*
- * hack: gs_change can fail with user gsbase. If this happens, fix up
+ * hack: .Lgs_change can fail with user gsbase. If this happens, fix up
* gsbase and proceed. We'll fix up the exception and land in
- * gs_change's error handler with kernel gsbase.
+ * .Lgs_change's error handler with kernel gsbase.
*/
jmp .Lerror_entry_from_usermode_swapgs
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 847f2f0..e1721da 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -72,24 +72,23 @@
pushfq /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
pushq $__USER32_CS /* pt_regs->cs */
- xorq %r8,%r8
- pushq %r8 /* pt_regs->ip = 0 (placeholder) */
+ pushq $0 /* pt_regs->ip = 0 (placeholder) */
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
- pushq %r8 /* pt_regs->r8 = 0 */
- pushq %r8 /* pt_regs->r9 = 0 */
- pushq %r8 /* pt_regs->r10 = 0 */
- pushq %r8 /* pt_regs->r11 = 0 */
+ pushq $0 /* pt_regs->r8 = 0 */
+ pushq $0 /* pt_regs->r9 = 0 */
+ pushq $0 /* pt_regs->r10 = 0 */
+ pushq $0 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
- pushq %r8 /* pt_regs->r12 = 0 */
- pushq %r8 /* pt_regs->r13 = 0 */
- pushq %r8 /* pt_regs->r14 = 0 */
- pushq %r8 /* pt_regs->r15 = 0 */
+ pushq $0 /* pt_regs->r12 = 0 */
+ pushq $0 /* pt_regs->r13 = 0 */
+ pushq $0 /* pt_regs->r14 = 0 */
+ pushq $0 /* pt_regs->r15 = 0 */
cld
/*
@@ -205,17 +204,16 @@
pushq %rdx /* pt_regs->dx */
pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
- xorq %r8,%r8
- pushq %r8 /* pt_regs->r8 = 0 */
- pushq %r8 /* pt_regs->r9 = 0 */
- pushq %r8 /* pt_regs->r10 = 0 */
- pushq %r8 /* pt_regs->r11 = 0 */
+ pushq $0 /* pt_regs->r8 = 0 */
+ pushq $0 /* pt_regs->r9 = 0 */
+ pushq $0 /* pt_regs->r10 = 0 */
+ pushq $0 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
- pushq %r8 /* pt_regs->r12 = 0 */
- pushq %r8 /* pt_regs->r13 = 0 */
- pushq %r8 /* pt_regs->r14 = 0 */
- pushq %r8 /* pt_regs->r15 = 0 */
+ pushq $0 /* pt_regs->r12 = 0 */
+ pushq $0 /* pt_regs->r13 = 0 */
+ pushq $0 /* pt_regs->r14 = 0 */
+ pushq $0 /* pt_regs->r15 = 0 */
/*
* User mode is traced as though IRQs are on, and SYSENTER
@@ -316,11 +314,10 @@
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
- xorq %r8,%r8
- pushq %r8 /* pt_regs->r8 = 0 */
- pushq %r8 /* pt_regs->r9 = 0 */
- pushq %r8 /* pt_regs->r10 = 0 */
- pushq %r8 /* pt_regs->r11 = 0 */
+ pushq $0 /* pt_regs->r8 = 0 */
+ pushq $0 /* pt_regs->r9 = 0 */
+ pushq $0 /* pt_regs->r10 = 0 */
+ pushq $0 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp */
pushq %r12 /* pt_regs->r12 */
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index b30dd81..4cddd17 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -384,5 +384,5 @@
375 i386 membarrier sys_membarrier
376 i386 mlock2 sys_mlock2
377 i386 copy_file_range sys_copy_file_range
-378 i386 preadv2 sys_preadv2
-379 i386 pwritev2 sys_pwritev2
+378 i386 preadv2 sys_preadv2 compat_sys_preadv2
+379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index cac6d17..555263e 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -374,3 +374,5 @@
543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit
545 x32 execveat compat_sys_execveat/ptregs
+534 x32 preadv2 compat_sys_preadv2
+535 x32 pwritev2 compat_sys_pwritev2
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 03c3eb7..2f02d23 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -13,7 +13,6 @@
#include <uapi/linux/time.h>
#include <asm/vgtod.h>
-#include <asm/hpet.h>
#include <asm/vvar.h>
#include <asm/unistd.h>
#include <asm/msr.h>
@@ -28,16 +27,6 @@
extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);
-#ifdef CONFIG_HPET_TIMER
-extern u8 hpet_page
- __attribute__((visibility("hidden")));
-
-static notrace cycle_t vread_hpet(void)
-{
- return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
-}
-#endif
-
#ifdef CONFIG_PARAVIRT_CLOCK
extern u8 pvclock_page
__attribute__((visibility("hidden")));
@@ -195,10 +184,6 @@
if (gtod->vclock_mode == VCLOCK_TSC)
cycles = vread_tsc();
-#ifdef CONFIG_HPET_TIMER
- else if (gtod->vclock_mode == VCLOCK_HPET)
- cycles = vread_hpet();
-#endif
#ifdef CONFIG_PARAVIRT_CLOCK
else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
cycles = vread_pvclock(mode);
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index 4158acc..a708aa9 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -25,7 +25,7 @@
* segment.
*/
- vvar_start = . - 3 * PAGE_SIZE;
+ vvar_start = . - 2 * PAGE_SIZE;
vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */
@@ -35,8 +35,7 @@
#undef __VVAR_KERNEL_LDS
#undef EMIT_VVAR
- hpet_page = vvar_start + PAGE_SIZE;
- pvclock_page = vvar_start + 2 * PAGE_SIZE;
+ pvclock_page = vvar_start + PAGE_SIZE;
. = SIZEOF_HEADERS;
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f7045..b3cf813 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -18,7 +18,6 @@
#include <asm/vdso.h>
#include <asm/vvar.h>
#include <asm/page.h>
-#include <asm/hpet.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
@@ -129,16 +128,6 @@
if (sym_offset == image->sym_vvar_page) {
ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
__pa_symbol(&__vvar_page) >> PAGE_SHIFT);
- } else if (sym_offset == image->sym_hpet_page) {
-#ifdef CONFIG_HPET_TIMER
- if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
- ret = vm_insert_pfn_prot(
- vma,
- (unsigned long)vmf->virtual_address,
- hpet_address >> PAGE_SHIFT,
- pgprot_noncached(PAGE_READONLY));
- }
-#endif
} else if (sym_offset == image->sym_pvclock_page) {
struct pvclock_vsyscall_time_info *pvti =
pvclock_pvti_cpu0_va();
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 041e442..54c1745 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1518,7 +1518,7 @@
static void __init pmu_check_apic(void)
{
- if (cpu_has_apic)
+ if (boot_cpu_has(X86_FEATURE_APIC))
return;
x86_pmu.apic = 0;
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 7946c42..d5045c8 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -677,7 +677,7 @@
{
int err;
- if (cpu_has_hypervisor)
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return -ENODEV;
err = cstate_init();
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 7012d18..3f6d8b5 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1383,7 +1383,7 @@
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -ENODEV;
- if (cpu_has_hypervisor)
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return -ENODEV;
max_packages = topology_max_packages();
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 0552884..2f29f4e 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -357,7 +357,7 @@
put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
/* Create the ucontext. */
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 99afb66..e77a644 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -1,11 +1,12 @@
#ifndef _ASM_X86_ALTERNATIVE_H
#define _ASM_X86_ALTERNATIVE_H
+#ifndef __ASSEMBLY__
+
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/stringify.h>
#include <asm/asm.h>
-#include <asm/ptrace.h>
/*
* Alternative inline assembly for SMP.
@@ -233,36 +234,6 @@
*/
#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end);
-#else
-static inline void apply_paravirt(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end)
-{}
-#define __parainstructions NULL
-#define __parainstructions_end NULL
-#endif
-
-extern void *text_poke_early(void *addr, const void *opcode, size_t len);
-
-/*
- * Clear and restore the kernel write-protection flag on the local CPU.
- * Allows the kernel to edit read-only pages.
- * Side-effect: any interrupt handler running between save and restore will have
- * the ability to write to read-only pages.
- *
- * Warning:
- * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and
- * no thread can be preempted in the instructions being modified (no iret to an
- * invalid instruction possible) or if the instructions are changed from a
- * consistent state to another consistent state atomically.
- * On the local CPU you need to be protected again NMI or MCE handlers seeing an
- * inconsistent instruction while you patch.
- */
-extern void *text_poke(void *addr, const void *opcode, size_t len);
-extern int poke_int3_handler(struct pt_regs *regs);
-extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 98f25bb..bc27611 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -239,10 +239,10 @@
extern void x2apic_setup(void);
static inline int x2apic_enabled(void)
{
- return cpu_has_x2apic && apic_is_x2apic_enabled();
+ return boot_cpu_has(X86_FEATURE_X2APIC) && apic_is_x2apic_enabled();
}
-#define x2apic_supported() (cpu_has_x2apic)
+#define x2apic_supported() (boot_cpu_has(X86_FEATURE_X2APIC))
#else /* !CONFIG_X86_X2APIC */
static inline void check_x2apic(void) { }
static inline void x2apic_setup(void) { }
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index d194266..eae33c7 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -3,11 +3,10 @@
#ifndef _ASM_X86_CLOCKSOURCE_H
#define _ASM_X86_CLOCKSOURCE_H
-#define VCLOCK_NONE 0 /* No vDSO clock available. */
-#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
-#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
-#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
-#define VCLOCK_MAX 3
+#define VCLOCK_NONE 0 /* No vDSO clock available. */
+#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
+#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */
+#define VCLOCK_MAX 2
struct arch_clocksource_data {
int vclock_mode;
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index ebb102e..5a3b2c1 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -307,7 +307,7 @@
return (void __user *)round_down(sp - len, 16);
}
-static inline bool is_x32_task(void)
+static inline bool in_x32_syscall(void)
{
#ifdef CONFIG_X86_X32_ABI
if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
@@ -318,7 +318,7 @@
static inline bool in_compat_syscall(void)
{
- return is_ia32_task() || is_x32_task();
+ return in_ia32_syscall() || in_x32_syscall();
}
#define in_compat_syscall in_compat_syscall /* override the generic impl */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 3636ec0..07c942d 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -118,31 +118,6 @@
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
-#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
-#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE)
-#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC)
-#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE)
-#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR)
-#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
-#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2)
-#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES)
-#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX)
-#define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2)
-#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH)
-#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES)
-#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
-#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
-#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
-#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
-#define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES)
-#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
-#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
-/*
- * Do not add any more of those clumsy macros - use static_cpu_has() for
- * fast paths and boot_cpu_has() otherwise!
- */
-
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 8f9afef..7bfb6b7 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -294,6 +294,9 @@
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE X86_BUG(10) /* SWAPGS without input dep on GS */
+
#ifdef CONFIG_X86_32
/*
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 15340e3..fea7724 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -176,7 +176,7 @@
regs->si = regs->di = regs->bp = 0;
regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
- t->fs = t->gs = 0;
+ t->fsbase = t->gsbase = 0;
t->fsindex = t->gsindex = 0;
t->ds = t->es = ds;
}
@@ -226,8 +226,8 @@
(pr_reg)[18] = (regs)->flags; \
(pr_reg)[19] = (regs)->sp; \
(pr_reg)[20] = (regs)->ss; \
- (pr_reg)[21] = current->thread.fs; \
- (pr_reg)[22] = current->thread.gs; \
+ (pr_reg)[21] = current->thread.fsbase; \
+ (pr_reg)[22] = current->thread.gsbase; \
asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \
asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \
asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index e6a8613..3a10616 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -4,7 +4,7 @@
#include <asm/page.h>
#include <asm-generic/hugetlb.h>
-#define hugepages_supported() cpu_has_pse
+#define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE)
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr,
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index d0afb05..f706041 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -5,7 +5,7 @@
static inline bool arch_irq_work_has_interrupt(void)
{
- return cpu_has_apic;
+ return boot_cpu_has(X86_FEATURE_APIC);
}
#endif /* _ASM_IRQ_WORK_H */
diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h
index 332f98c..22a8537 100644
--- a/arch/x86/include/asm/kgdb.h
+++ b/arch/x86/include/asm/kgdb.h
@@ -6,6 +6,8 @@
* Copyright (C) 2008 Wind River Systems, Inc.
*/
+#include <asm/ptrace.h>
+
/*
* BUFMAX defines the maximum number of characters in inbound/outbound
* buffers at least NUMREGBYTES*2 are needed for register packets
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 7a79ee2..7dc1d8f 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -84,7 +84,10 @@
{
DECLARE_ARGS(val, low, high);
- asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+ asm volatile("1: rdmsr\n"
+ "2:\n"
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
+ : EAX_EDX_RET(val, low, high) : "c" (msr));
if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
@@ -98,7 +101,10 @@
asm volatile("2: rdmsr ; xor %[err],%[err]\n"
"1:\n\t"
".section .fixup,\"ax\"\n\t"
- "3: mov %[fault],%[err] ; jmp 1b\n\t"
+ "3: mov %[fault],%[err]\n\t"
+ "xorl %%eax, %%eax\n\t"
+ "xorl %%edx, %%edx\n\t"
+ "jmp 1b\n\t"
".previous\n\t"
_ASM_EXTABLE(2b, 3b)
: [err] "=r" (*err), EAX_EDX_RET(val, low, high)
@@ -108,10 +114,14 @@
return EAX_EDX_VAL(val, low, high);
}
-static inline void native_write_msr(unsigned int msr,
- unsigned low, unsigned high)
+/* Can be uninlined because referenced by paravirt */
+notrace static inline void native_write_msr(unsigned int msr,
+ unsigned low, unsigned high)
{
- asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+ asm volatile("1: wrmsr\n"
+ "2:\n"
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
+ : : "c" (msr), "a"(low), "d" (high) : "memory");
if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
}
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index b94f6f6..dbff145 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -24,6 +24,7 @@
#define _ASM_X86_MTRR_H
#include <uapi/asm/mtrr.h>
+#include <asm/pat.h>
/*
@@ -83,9 +84,12 @@
static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
{
}
+static inline void mtrr_bp_init(void)
+{
+ pat_disable("MTRRs disabled, skipping PAT initialization too.");
+}
#define mtrr_ap_init() do {} while (0)
-#define mtrr_bp_init() do {} while (0)
#define set_mtrr_aps_delayed_init() do {} while (0)
#define mtrr_aps_init() do {} while (0)
#define mtrr_bp_restore() do {} while (0)
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 601f1b8..3c73141 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -130,21 +130,31 @@
#define get_kernel_rpl() (pv_info.kernel_rpl)
-static inline u64 paravirt_read_msr(unsigned msr, int *err)
+static inline u64 paravirt_read_msr(unsigned msr)
{
- return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
+ return PVOP_CALL1(u64, pv_cpu_ops.read_msr, msr);
}
-static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
+static inline void paravirt_write_msr(unsigned msr,
+ unsigned low, unsigned high)
{
- return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
+ return PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high);
}
-/* These should all do BUG_ON(_err), but our headers are too tangled. */
+static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
+{
+ return PVOP_CALL2(u64, pv_cpu_ops.read_msr_safe, msr, err);
+}
+
+static inline int paravirt_write_msr_safe(unsigned msr,
+ unsigned low, unsigned high)
+{
+ return PVOP_CALL3(int, pv_cpu_ops.write_msr_safe, msr, low, high);
+}
+
#define rdmsr(msr, val1, val2) \
do { \
- int _err; \
- u64 _l = paravirt_read_msr(msr, &_err); \
+ u64 _l = paravirt_read_msr(msr); \
val1 = (u32)_l; \
val2 = _l >> 32; \
} while (0)
@@ -156,8 +166,7 @@
#define rdmsrl(msr, val) \
do { \
- int _err; \
- val = paravirt_read_msr(msr, &_err); \
+ val = paravirt_read_msr(msr); \
} while (0)
static inline void wrmsrl(unsigned msr, u64 val)
@@ -165,23 +174,23 @@
wrmsr(msr, (u32)val, (u32)(val>>32));
}
-#define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b)
+#define wrmsr_safe(msr, a, b) paravirt_write_msr_safe(msr, a, b)
/* rdmsr with exception handling */
-#define rdmsr_safe(msr, a, b) \
-({ \
- int _err; \
- u64 _l = paravirt_read_msr(msr, &_err); \
- (*a) = (u32)_l; \
- (*b) = _l >> 32; \
- _err; \
+#define rdmsr_safe(msr, a, b) \
+({ \
+ int _err; \
+ u64 _l = paravirt_read_msr_safe(msr, &_err); \
+ (*a) = (u32)_l; \
+ (*b) = _l >> 32; \
+ _err; \
})
static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
{
int err;
- *p = paravirt_read_msr(msr, &err);
+ *p = paravirt_read_msr_safe(msr, &err);
return err;
}
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index e8c2326..b4a23ea 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -155,10 +155,16 @@
void (*cpuid)(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
- /* MSR, PMC and TSR operations.
- err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
- u64 (*read_msr)(unsigned int msr, int *err);
- int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+ /* Unsafe MSR operations. These will warn or panic on failure. */
+ u64 (*read_msr)(unsigned int msr);
+ void (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+
+ /*
+ * Safe MSR operations.
+ * read sets err to 0 or -EIO. write returns 0 or -EIO.
+ */
+ u64 (*read_msr_safe)(unsigned int msr, int *err);
+ int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high);
u64 (*read_pmc)(int counter);
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index ca6c228..0b1ff4c 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -5,8 +5,8 @@
#include <asm/pgtable_types.h>
bool pat_enabled(void);
+void pat_disable(const char *reason);
extern void pat_init(void);
-void pat_init_cache_modes(u64);
extern int reserve_memtype(u64 start, u64 end,
enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 97f3242..f86491a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -183,7 +183,7 @@
static inline int has_transparent_hugepage(void)
{
- return cpu_has_pse;
+ return boot_cpu_has(X86_FEATURE_PSE);
}
#ifdef __HAVE_ARCH_PTE_DEVMAP
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9264476..9251aa9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -388,9 +388,16 @@
unsigned long ip;
#endif
#ifdef CONFIG_X86_64
- unsigned long fs;
+ unsigned long fsbase;
+ unsigned long gsbase;
+#else
+ /*
+ * XXX: this could presumably be unsigned short. Alternatively,
+ * 32-bit kernels could be taught to use fsindex instead.
+ */
+ unsigned long fs;
+ unsigned long gs;
#endif
- unsigned long gs;
/* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM];
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 7d5a192..1549caa0 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -2,6 +2,7 @@
#define _ASM_X86_SEGMENT_H
#include <linux/const.h>
+#include <asm/alternative.h>
/*
* Constructor for a conventional segment GDT (or LDT) entry.
@@ -207,13 +208,6 @@
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3)
-/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
-#define FS_TLS 0
-#define GS_TLS 1
-
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
#endif
#ifndef CONFIG_PARAVIRT
@@ -249,10 +243,13 @@
#endif
/*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
+ * Load a segment. Fall back on loading the zero segment if something goes
+ * wrong. This variant assumes that loading zero fully clears the segment.
+ * This is always the case on Intel CPUs and, even on 64-bit AMD CPUs, any
+ * failure to fully clear the cached descriptor is only observable for
+ * FS and GS.
*/
-#define loadsegment(seg, value) \
+#define __loadsegment_simple(seg, value) \
do { \
unsigned short __val = (value); \
\
@@ -269,6 +266,38 @@
: "+r" (__val) : : "memory"); \
} while (0)
+#define __loadsegment_ss(value) __loadsegment_simple(ss, (value))
+#define __loadsegment_ds(value) __loadsegment_simple(ds, (value))
+#define __loadsegment_es(value) __loadsegment_simple(es, (value))
+
+#ifdef CONFIG_X86_32
+
+/*
+ * On 32-bit systems, the hidden parts of FS and GS are unobservable if
+ * the selector is NULL, so there's no funny business here.
+ */
+#define __loadsegment_fs(value) __loadsegment_simple(fs, (value))
+#define __loadsegment_gs(value) __loadsegment_simple(gs, (value))
+
+#else
+
+static inline void __loadsegment_fs(unsigned short value)
+{
+ asm volatile(" \n"
+ "1: movw %0, %%fs \n"
+ "2: \n"
+
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs)
+
+ : : "rm" (value) : "memory");
+}
+
+/* __loadsegment_gs is intentionally undefined. Use load_gs_index instead. */
+
+#endif
+
+#define loadsegment(seg, value) __loadsegment_ ## seg (value)
+
/*
* Save a segment register away:
*/
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 11af24e..ac1d5da 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -6,6 +6,7 @@
#define COMMAND_LINE_SIZE 2048
#include <linux/linkage.h>
+#include <asm/page_types.h>
#ifdef __i386__
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
new file mode 100644
index 0000000..9039506
--- /dev/null
+++ b/arch/x86/include/asm/text-patching.h
@@ -0,0 +1,40 @@
+#ifndef _ASM_X86_TEXT_PATCHING_H
+#define _ASM_X86_TEXT_PATCHING_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <asm/ptrace.h>
+
+struct paravirt_patch_site;
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch_site *start,
+ struct paravirt_patch_site *end);
+#else
+static inline void apply_paravirt(struct paravirt_patch_site *start,
+ struct paravirt_patch_site *end)
+{}
+#define __parainstructions NULL
+#define __parainstructions_end NULL
+#endif
+
+extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+
+/*
+ * Clear and restore the kernel write-protection flag on the local CPU.
+ * Allows the kernel to edit read-only pages.
+ * Side-effect: any interrupt handler running between save and restore will have
+ * the ability to write to read-only pages.
+ *
+ * Warning:
+ * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and
+ * no thread can be preempted in the instructions being modified (no iret to an
+ * invalid instruction possible) or if the instructions are changed from a
+ * consistent state to another consistent state atomically.
+ * On the local CPU you need to be protected again NMI or MCE handlers seeing an
+ * inconsistent instruction while you patch.
+ */
+extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern int poke_int3_handler(struct pt_regs *regs);
+extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+
+#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ffae84d..30c133a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -255,7 +255,7 @@
return true;
}
-static inline bool is_ia32_task(void)
+static inline bool in_ia32_syscall(void)
{
#ifdef CONFIG_X86_32
return true;
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 1fde8d5..4e5be94 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -181,7 +181,7 @@
static inline void __flush_tlb_all(void)
{
- if (cpu_has_pge)
+ if (static_cpu_has(X86_FEATURE_PGE))
__flush_tlb_global();
else
__flush_tlb();
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 174c421..7428697 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -22,7 +22,7 @@
static inline cycles_t get_cycles(void)
{
#ifndef CONFIG_X86_TSC
- if (!cpu_has_tsc)
+ if (!boot_cpu_has(X86_FEATURE_TSC))
return 0;
#endif
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a969ae6..d794fd1 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -110,7 +110,7 @@
extern int fixup_exception(struct pt_regs *regs, int trapnr);
extern bool ex_has_fault_handler(unsigned long ip);
-extern int early_fixup_exception(unsigned long *ip);
+extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
/*
* These are the main single-value transfer routines. They automatically
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index c54beb4..635eac5 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -550,7 +550,7 @@
#define XOR_TRY_TEMPLATES \
do { \
AVX_XOR_SPEED; \
- if (cpu_has_xmm) { \
+ if (boot_cpu_has(X86_FEATURE_XMM)) { \
xor_speed(&xor_block_pIII_sse); \
xor_speed(&xor_block_sse_pf64); \
} else if (boot_cpu_has(X86_FEATURE_MMX)) { \
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 7c0a517..22a7b18 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -167,12 +167,12 @@
#define AVX_XOR_SPEED \
do { \
- if (cpu_has_avx && cpu_has_osxsave) \
+ if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
xor_speed(&xor_block_avx); \
} while (0)
#define AVX_SELECT(FASTEST) \
- (cpu_has_avx && cpu_has_osxsave ? &xor_block_avx : FASTEST)
+ (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
#else
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8c2f1ef..2522e56 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -136,7 +136,7 @@
{
struct acpi_table_madt *madt = NULL;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -EINVAL;
madt = (struct acpi_table_madt *)table;
@@ -951,7 +951,7 @@
{
int count;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
/*
@@ -979,7 +979,7 @@
int ret;
struct acpi_subtable_proc madt_proc[2];
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
/*
@@ -1125,7 +1125,7 @@
if (acpi_disabled || acpi_noirq)
return -ENODEV;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
/*
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 25f9093..5cb272a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -11,6 +11,7 @@
#include <linux/stop_machine.h>
#include <linux/slab.h>
#include <linux/kdebug.h>
+#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d356987..60078a6 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -607,7 +607,7 @@
long tapic = apic_read(APIC_TMCCT);
unsigned long pm = acpi_pm_read_early();
- if (cpu_has_tsc)
+ if (boot_cpu_has(X86_FEATURE_TSC))
tsc = rdtsc();
switch (lapic_cal_loops++) {
@@ -668,7 +668,7 @@
*delta = (long)res;
/* Correct the tsc counter value */
- if (cpu_has_tsc) {
+ if (boot_cpu_has(X86_FEATURE_TSC)) {
res = (((u64)(*deltatsc)) * pm_100ms);
do_div(res, deltapm);
apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
@@ -760,7 +760,7 @@
apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
lapic_timer_frequency);
- if (cpu_has_tsc) {
+ if (boot_cpu_has(X86_FEATURE_TSC)) {
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
"%ld.%04ld MHz.\n",
(deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
@@ -1085,7 +1085,7 @@
{
unsigned long flags;
- if (!cpu_has_apic && !apic_from_smp_config())
+ if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
return;
local_irq_save(flags);
@@ -1134,7 +1134,7 @@
* Don't do the setup now if we have a SMP BIOS as the
* through-I/O-APIC virtual wire mode might be active.
*/
- if (smp_found_config || !cpu_has_apic)
+ if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
return;
/*
@@ -1227,7 +1227,7 @@
unsigned long long tsc = 0, ntsc;
long long max_loops = cpu_khz ? cpu_khz : 1000000;
- if (cpu_has_tsc)
+ if (boot_cpu_has(X86_FEATURE_TSC))
tsc = rdtsc();
if (disable_apic) {
@@ -1311,7 +1311,7 @@
break;
}
if (queued) {
- if (cpu_has_tsc && cpu_khz) {
+ if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
ntsc = rdtsc();
max_loops = (cpu_khz << 10) - (ntsc - tsc);
} else
@@ -1445,7 +1445,7 @@
{
u64 msr;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return;
rdmsrl(MSR_IA32_APICBASE, msr);
@@ -1561,7 +1561,7 @@
pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
x2apic_mode = 1;
x2apic_state = X2APIC_ON;
- } else if (!cpu_has_x2apic) {
+ } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
x2apic_state = X2APIC_DISABLED;
}
}
@@ -1632,7 +1632,7 @@
*/
static int __init detect_init_APIC(void)
{
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
pr_info("No local APIC present\n");
return -1;
}
@@ -1711,14 +1711,14 @@
goto no_apic;
case X86_VENDOR_INTEL:
if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
- (boot_cpu_data.x86 == 5 && cpu_has_apic))
+ (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
break;
goto no_apic;
default:
goto no_apic;
}
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
/*
* Over-ride BIOS and try to enable the local APIC only if
* "lapic" specified.
@@ -2233,19 +2233,19 @@
return -1;
}
#ifdef CONFIG_X86_64
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
disable_apic = 1;
pr_info("Apic disabled by BIOS\n");
return -1;
}
#else
- if (!smp_found_config && !cpu_has_apic)
+ if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC))
return -1;
/*
* Complain if the BIOS pretends there is one.
*/
- if (!cpu_has_apic &&
+ if (!boot_cpu_has(X86_FEATURE_APIC) &&
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
boot_cpu_physical_apicid);
@@ -2426,7 +2426,7 @@
static int __init init_lapic_sysfs(void)
{
/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
- if (cpu_has_apic)
+ if (boot_cpu_has(X86_FEATURE_APIC))
register_syscore_ops(&lapic_syscore_ops);
return 0;
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 331a7a0..13d19ed 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -100,13 +100,13 @@
static u32 noop_apic_read(u32 reg)
{
- WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+ WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
return 0;
}
static void noop_apic_write(u32 reg, u32 v)
{
- WARN_ON_ONCE(cpu_has_apic && !disable_apic);
+ WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
}
struct apic apic_noop = {
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fdb0fbf..84e33ff 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1454,7 +1454,7 @@
ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
}
- if (cpu_has_apic || apic_from_smp_config())
+ if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 28bde88..2a0f225 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -230,7 +230,7 @@
{
int apicid, cpuid;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return 0;
apicid = hard_smp_processor_id();
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index ef49551..a5e400a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -944,7 +944,7 @@
print_PIC();
/* don't print out if apic is not there */
- if (!cpu_has_apic && !apic_from_smp_config())
+ if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
return 0;
print_local_APICs(show_lapic);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7b76eb6..c343a54 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -565,14 +565,17 @@
* can safely set X86_FEATURE_EXTD_APICID unconditionally for families
* after 16h.
*/
- if (cpu_has_apic && c->x86 > 0x16) {
- set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
- } else if (cpu_has_apic && c->x86 >= 0xf) {
- /* check CPU config space for extended APIC ID */
- unsigned int val;
- val = read_pci_config(0, 24, 0, 0x68);
- if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
+ if (boot_cpu_has(X86_FEATURE_APIC)) {
+ if (c->x86 > 0x16)
set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+ else if (c->x86 >= 0xf) {
+ /* check CPU config space for extended APIC ID */
+ unsigned int val;
+
+ val = read_pci_config(0, 24, 0, 0x68);
+ if ((val >> 17 & 0x3) == 0x3)
+ set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+ }
}
#endif
@@ -628,6 +631,7 @@
*/
msr_set_bit(MSR_K7_HWCR, 6);
#endif
+ set_cpu_bug(c, X86_BUG_SWAPGS_FENCE);
}
static void init_amd_gh(struct cpuinfo_x86 *c)
@@ -746,7 +750,7 @@
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
- if (cpu_has_xmm2) {
+ if (cpu_has(c, X86_FEATURE_XMM2)) {
/* MFENCE stops RDTSC speculation */
set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8394b3d..0881061 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -430,7 +430,7 @@
#ifdef CONFIG_X86_32
loadsegment(fs, __KERNEL_PERCPU);
#else
- loadsegment(gs, 0);
+ __loadsegment_simple(gs, 0);
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
#endif
load_stack_canary_segment();
@@ -862,30 +862,34 @@
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
+}
+static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
/*
- * ESPFIX is a strange bug. All real CPUs have it. Paravirt
- * systems that run Linux at CPL > 0 may or may not have the
- * issue, but, even if they have the issue, there's absolutely
- * nothing we can do about it because we can't use the real IRET
- * instruction.
+ * Empirically, writing zero to a segment selector on AMD does
+ * not clear the base, whereas writing zero to a segment
+ * selector on Intel does clear the base. Intel's behavior
+ * allows slightly faster context switches in the common case
+ * where GS is unused by the prev and next threads.
*
- * NB: For the time being, only 32-bit kernels support
- * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
- * whether to apply espfix using paravirt hooks. If any
- * non-paravirt system ever shows up that does *not* have the
- * ESPFIX issue, we can change this.
+ * Since neither vendor documents this anywhere that I can see,
+ * detect it directly instead of hardcoding the choice by
+ * vendor.
+ *
+ * I've designated AMD's behavior as the "bug" because it's
+ * counterintuitive and less friendly.
*/
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_PARAVIRT
- do {
- extern void native_iret(void);
- if (pv_cpu_ops.iret == native_iret)
- set_cpu_bug(c, X86_BUG_ESPFIX);
- } while (0);
-#else
- set_cpu_bug(c, X86_BUG_ESPFIX);
-#endif
+
+ unsigned long old_base, tmp;
+ rdmsrl(MSR_FS_BASE, old_base);
+ wrmsrl(MSR_FS_BASE, 1);
+ loadsegment(fs, 0);
+ rdmsrl(MSR_FS_BASE, tmp);
+ if (tmp != 0)
+ set_cpu_bug(c, X86_BUG_NULL_SEG);
+ wrmsrl(MSR_FS_BASE, old_base);
#endif
}
@@ -921,6 +925,33 @@
get_model_name(c); /* Default name */
detect_nopl(c);
+
+ detect_null_seg_behavior(c);
+
+ /*
+ * ESPFIX is a strange bug. All real CPUs have it. Paravirt
+ * systems that run Linux at CPL > 0 may or may not have the
+ * issue, but, even if they have the issue, there's absolutely
+ * nothing we can do about it because we can't use the real IRET
+ * instruction.
+ *
+ * NB: For the time being, only 32-bit kernels support
+ * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
+ * whether to apply espfix using paravirt hooks. If any
+ * non-paravirt system ever shows up that does *not* have the
+ * ESPFIX issue, we can change this.
+ */
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_PARAVIRT
+ do {
+ extern void native_iret(void);
+ if (pv_cpu_ops.iret == native_iret)
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+ } while (0);
+# else
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+# endif
+#endif
}
static void x86_init_cache_qos(struct cpuinfo_x86 *c)
@@ -1076,12 +1107,12 @@
struct tss_struct *tss;
int cpu;
+ if (!boot_cpu_has(X86_FEATURE_SEP))
+ return;
+
cpu = get_cpu();
tss = &per_cpu(cpu_tss, cpu);
- if (!boot_cpu_has(X86_FEATURE_SEP))
- goto out;
-
/*
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
* see the big comment in struct x86_hw_tss's definition.
@@ -1096,7 +1127,6 @@
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
-out:
put_cpu();
}
#endif
@@ -1528,7 +1558,7 @@
pr_info("Initializing CPU#%d\n", cpu);
if (cpu_feature_enabled(X86_FEATURE_VME) ||
- cpu_has_tsc ||
+ boot_cpu_has(X86_FEATURE_TSC) ||
boot_cpu_has(X86_FEATURE_DE))
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 6adef9c..bd9dcd6 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -333,7 +333,7 @@
switch (dir0_lsn) {
case 0xd: /* either a 486SLC or DLC w/o DEVID */
dir0_msn = 0;
- p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
+ p = Cx486_name[!!boot_cpu_has(X86_FEATURE_FPU)];
break;
case 0xe: /* a 486S A step */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 1f7fdb9..1d55822 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -152,9 +152,9 @@
* the TLB when any changes are made to any of the page table entries.
* The operating system must reload CR3 to cause the TLB to be flushed"
*
- * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should
- * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
- * to be modified
+ * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
+ * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
+ * to be modified.
*/
if (c->x86 == 5 && c->x86_model == 9) {
pr_info("Disabling PGE capability bit\n");
@@ -281,7 +281,7 @@
* integrated APIC (see 11AP erratum in "Pentium Processor
* Specification Update").
*/
- if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
+ if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
(c->x86_mask < 0x6 || c->x86_mask == 0xb))
set_cpu_bug(c, X86_BUG_11AP);
@@ -456,7 +456,7 @@
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
}
- if (cpu_has_xmm2)
+ if (cpu_has(c, X86_FEATURE_XMM2))
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
if (boot_cpu_has(X86_FEATURE_DS)) {
@@ -468,7 +468,7 @@
set_cpu_cap(c, X86_FEATURE_PEBS);
}
- if (c->x86 == 6 && cpu_has_clflush &&
+ if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
(c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 1e8bb6c..1defb8e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -84,7 +84,7 @@
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return 0;
- if (!cpu_has_apic || lapic_get_maxlvt() < 6)
+ if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap);
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index ac780ca..6b9dc4d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -450,7 +450,7 @@
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
static int intel_thermal_supported(struct cpuinfo_x86 *c)
{
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return 0;
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index f8c81ba..b1086f7 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -137,7 +137,7 @@
u32 cr0;
/* Save value of CR4 and clear Page Global Enable (bit 7) */
- if (cpu_has_pge) {
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4 = __read_cr4();
__write_cr4(cr4 & ~X86_CR4_PGE);
}
@@ -170,7 +170,7 @@
write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
- if (cpu_has_pge)
+ if (boot_cpu_has(X86_FEATURE_PGE))
__write_cr4(cr4);
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 19f5736..16e37a25 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -444,11 +444,24 @@
pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
}
+/* PAT setup for BP. We need to go through sync steps here */
+void __init mtrr_bp_pat_init(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ prepare_set();
+
+ pat_init();
+
+ post_set();
+ local_irq_restore(flags);
+}
+
/* Grab all of the MTRR state for this CPU into *state */
bool __init get_mtrr_state(void)
{
struct mtrr_var_range *vrs;
- unsigned long flags;
unsigned lo, dummy;
unsigned int i;
@@ -481,15 +494,6 @@
mtrr_state_set = 1;
- /* PAT setup for BP. We need to go through sync steps here */
- local_irq_save(flags);
- prepare_set();
-
- pat_init();
-
- post_set();
- local_irq_restore(flags);
-
return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
}
@@ -741,7 +745,7 @@
wbinvd();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
- if (cpu_has_pge) {
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4 = __read_cr4();
__write_cr4(cr4 & ~X86_CR4_PGE);
}
@@ -771,7 +775,7 @@
write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
- if (cpu_has_pge)
+ if (boot_cpu_has(X86_FEATURE_PGE))
__write_cr4(cr4);
raw_spin_unlock(&set_atomicity_lock);
}
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 10f8d47..7d393ec 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -752,6 +752,9 @@
/* BIOS may override */
__mtrr_enabled = get_mtrr_state();
+ if (mtrr_enabled())
+ mtrr_bp_pat_init();
+
if (mtrr_cleanup(phys_addr)) {
changed_by_mtrr_cleanup = 1;
mtrr_if->set_all();
@@ -759,8 +762,16 @@
}
}
- if (!mtrr_enabled())
+ if (!mtrr_enabled()) {
pr_info("MTRR: Disabled\n");
+
+ /*
+ * PAT initialization relies on MTRR's rendezvous handler.
+ * Skip PAT init until the handler can initialize both
+ * features independently.
+ */
+ pat_disable("MTRRs disabled, skipping PAT initialization too.");
+ }
}
void mtrr_ap_init(void)
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 951884d..6c7ced0 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -52,6 +52,7 @@
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
bool get_mtrr_state(void);
+void mtrr_bp_pat_init(void);
extern void set_mtrr_ops(const struct mtrr_ops *ops);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 364e583..8cac429 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -94,7 +94,7 @@
*/
static uint32_t __init vmware_platform(void)
{
- if (cpu_has_hypervisor) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
unsigned int eax;
unsigned int hyper_vendor_id[3];
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 1f4acd6..3fe45f8 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -151,7 +151,7 @@
return;
/* Did the boot loader setup the local APIC ? */
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
if (apic_force_enable(r.start))
return;
}
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index dd9ca9b6..aad34aa 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -21,11 +21,15 @@
* We should really only care about bugs here
* anyway. Not features.
*/
-static void __init check_fpu(void)
+void __init fpu__init_check_bugs(void)
{
u32 cr0_saved;
s32 fdiv_bug;
+ /* kernel_fpu_begin/end() relies on patched alternative instructions. */
+ if (!boot_cpu_has(X86_FEATURE_FPU))
+ return;
+
/* We might have CR0::TS set already, clear it: */
cr0_saved = read_cr0();
write_cr0(cr0_saved & ~X86_CR0_TS);
@@ -59,13 +63,3 @@
pr_warn("Hmm, FPU with FDIV bug\n");
}
}
-
-void __init fpu__init_check_bugs(void)
-{
- /*
- * kernel_fpu_begin/end() in check_fpu() relies on the patched
- * alternative instructions.
- */
- if (cpu_has_fpu)
- check_fpu();
-}
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8e37cc8..9702754 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -217,14 +217,14 @@
void fpstate_init(union fpregs_state *state)
{
- if (!cpu_has_fpu) {
+ if (!static_cpu_has(X86_FEATURE_FPU)) {
fpstate_init_soft(&state->soft);
return;
}
memset(state, 0, xstate_size);
- if (cpu_has_fxsr)
+ if (static_cpu_has(X86_FEATURE_FXSR))
fpstate_init_fxstate(&state->fxsave);
else
fpstate_init_fstate(&state->fsave);
@@ -237,7 +237,7 @@
dst_fpu->fpregs_active = 0;
dst_fpu->last_cpu = -1;
- if (!src_fpu->fpstate_active || !cpu_has_fpu)
+ if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
return 0;
WARN_ON_FPU(src_fpu != ¤t->thread.fpu);
@@ -506,33 +506,6 @@
* x87 math exception handling:
*/
-static inline unsigned short get_fpu_cwd(struct fpu *fpu)
-{
- if (cpu_has_fxsr) {
- return fpu->state.fxsave.cwd;
- } else {
- return (unsigned short)fpu->state.fsave.cwd;
- }
-}
-
-static inline unsigned short get_fpu_swd(struct fpu *fpu)
-{
- if (cpu_has_fxsr) {
- return fpu->state.fxsave.swd;
- } else {
- return (unsigned short)fpu->state.fsave.swd;
- }
-}
-
-static inline unsigned short get_fpu_mxcsr(struct fpu *fpu)
-{
- if (cpu_has_xmm) {
- return fpu->state.fxsave.mxcsr;
- } else {
- return MXCSR_DEFAULT;
- }
-}
-
int fpu__exception_code(struct fpu *fpu, int trap_nr)
{
int err;
@@ -547,10 +520,15 @@
* so if this combination doesn't produce any single exception,
* then we have a bad program that isn't synchronizing its FPU usage
* and it will suffer the consequences since we won't be able to
- * fully reproduce the context of the exception
+ * fully reproduce the context of the exception.
*/
- cwd = get_fpu_cwd(fpu);
- swd = get_fpu_swd(fpu);
+ if (boot_cpu_has(X86_FEATURE_FXSR)) {
+ cwd = fpu->state.fxsave.cwd;
+ swd = fpu->state.fxsave.swd;
+ } else {
+ cwd = (unsigned short)fpu->state.fsave.cwd;
+ swd = (unsigned short)fpu->state.fsave.swd;
+ }
err = swd & ~cwd;
} else {
@@ -560,7 +538,11 @@
* unmasked exception was caught we must mask the exception mask bits
* at 0x1f80, and then use these to mask the exception bits at 0x3f.
*/
- unsigned short mxcsr = get_fpu_mxcsr(fpu);
+ unsigned short mxcsr = MXCSR_DEFAULT;
+
+ if (boot_cpu_has(X86_FEATURE_XMM))
+ mxcsr = fpu->state.fxsave.mxcsr;
+
err = ~(mxcsr >> 7) & mxcsr;
}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 54c86ff..aacfd7a 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -29,22 +29,22 @@
unsigned long cr0;
unsigned long cr4_mask = 0;
- if (cpu_has_fxsr)
+ if (boot_cpu_has(X86_FEATURE_FXSR))
cr4_mask |= X86_CR4_OSFXSR;
- if (cpu_has_xmm)
+ if (boot_cpu_has(X86_FEATURE_XMM))
cr4_mask |= X86_CR4_OSXMMEXCPT;
if (cr4_mask)
cr4_set_bits(cr4_mask);
cr0 = read_cr0();
cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
- if (!cpu_has_fpu)
+ if (!boot_cpu_has(X86_FEATURE_FPU))
cr0 |= X86_CR0_EM;
write_cr0(cr0);
/* Flush out any pending x87 state: */
#ifdef CONFIG_MATH_EMULATION
- if (!cpu_has_fpu)
+ if (!boot_cpu_has(X86_FEATURE_FPU))
fpstate_init_soft(¤t->thread.fpu.state.soft);
else
#endif
@@ -89,7 +89,7 @@
}
#ifndef CONFIG_MATH_EMULATION
- if (!cpu_has_fpu) {
+ if (!boot_cpu_has(X86_FEATURE_FPU)) {
pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
for (;;)
asm volatile("hlt");
@@ -106,7 +106,7 @@
{
unsigned int mask = 0;
- if (cpu_has_fxsr) {
+ if (boot_cpu_has(X86_FEATURE_FXSR)) {
/* Static because GCC does not get 16-byte stack alignment right: */
static struct fxregs_state fxregs __initdata;
@@ -212,7 +212,7 @@
* fpu__init_system_xstate().
*/
- if (!cpu_has_fpu) {
+ if (!boot_cpu_has(X86_FEATURE_FPU)) {
/*
* Disable xsave as we do not support it if i387
* emulation is enabled.
@@ -221,7 +221,7 @@
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
xstate_size = sizeof(struct swregs_state);
} else {
- if (cpu_has_fxsr)
+ if (boot_cpu_has(X86_FEATURE_FXSR))
xstate_size = sizeof(struct fxregs_state);
else
xstate_size = sizeof(struct fregs_state);
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 8bd1c00..81422df 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -21,7 +21,10 @@
{
struct fpu *target_fpu = &target->thread.fpu;
- return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0;
+ if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+ return regset->n;
+ else
+ return 0;
}
int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
@@ -30,7 +33,7 @@
{
struct fpu *fpu = &target->thread.fpu;
- if (!cpu_has_fxsr)
+ if (!boot_cpu_has(X86_FEATURE_FXSR))
return -ENODEV;
fpu__activate_fpstate_read(fpu);
@@ -47,7 +50,7 @@
struct fpu *fpu = &target->thread.fpu;
int ret;
- if (!cpu_has_fxsr)
+ if (!boot_cpu_has(X86_FEATURE_FXSR))
return -ENODEV;
fpu__activate_fpstate_write(fpu);
@@ -65,7 +68,7 @@
* update the header bits in the xsave header, indicating the
* presence of FP and SSE state.
*/
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
return ret;
@@ -79,7 +82,7 @@
struct xregs_state *xsave;
int ret;
- if (!cpu_has_xsave)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
return -ENODEV;
fpu__activate_fpstate_read(fpu);
@@ -108,7 +111,7 @@
struct xregs_state *xsave;
int ret;
- if (!cpu_has_xsave)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
return -ENODEV;
fpu__activate_fpstate_write(fpu);
@@ -275,10 +278,10 @@
fpu__activate_fpstate_read(fpu);
- if (!static_cpu_has(X86_FEATURE_FPU))
+ if (!boot_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
- if (!cpu_has_fxsr)
+ if (!boot_cpu_has(X86_FEATURE_FXSR))
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&fpu->state.fsave, 0,
-1);
@@ -306,10 +309,10 @@
fpu__activate_fpstate_write(fpu);
fpstate_sanitize_xstate(fpu);
- if (!static_cpu_has(X86_FEATURE_FPU))
+ if (!boot_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
- if (!cpu_has_fxsr)
+ if (!boot_cpu_has(X86_FEATURE_FXSR))
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&fpu->state.fsave, 0,
-1);
@@ -325,7 +328,7 @@
* update the header bit in the xsave header, indicating the
* presence of FP.
*/
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
return ret;
}
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b48ef35..4ea2a59 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -190,7 +190,7 @@
*/
void fpu__init_cpu_xstate(void)
{
- if (!cpu_has_xsave || !xfeatures_mask)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
return;
cr4_set_bits(X86_CR4_OSXSAVE);
@@ -280,7 +280,7 @@
xstate_comp_offsets[0] = 0;
xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
- if (!cpu_has_xsaves) {
+ if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (xfeature_enabled(i)) {
xstate_comp_offsets[i] = xstate_offsets[i];
@@ -316,13 +316,13 @@
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
- if (!cpu_has_xsave)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
return;
setup_xstate_features();
print_xstate_features();
- if (cpu_has_xsaves) {
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
init_fpstate.xsave.header.xfeatures = xfeatures_mask;
}
@@ -417,7 +417,7 @@
*/
static int using_compacted_format(void)
{
- return cpu_has_xsaves;
+ return boot_cpu_has(X86_FEATURE_XSAVES);
}
static void __xstate_dump_leaves(void)
@@ -549,7 +549,7 @@
unsigned int eax, ebx, ecx, edx;
unsigned int calculated_xstate_size;
- if (!cpu_has_xsaves) {
+ if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
/*
* - CPUID function 0DH, sub-function 0:
* EBX enumerates the size (in bytes) required by
@@ -630,7 +630,7 @@
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
- if (!cpu_has_xsave) {
+ if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
pr_info("x86/fpu: Legacy x87 FPU detected.\n");
return;
}
@@ -667,7 +667,7 @@
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
xfeatures_mask,
xstate_size,
- cpu_has_xsaves ? "compacted" : "standard");
+ boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
}
/*
@@ -678,7 +678,7 @@
/*
* Restore XCR0 on xsave capable CPUs:
*/
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index af11129..6f8902b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -555,62 +555,53 @@
*/
cld
- cmpl $2,(%esp) # X86_TRAP_NMI
- je .Lis_nmi # Ignore NMI
-
- cmpl $2,%ss:early_recursion_flag
- je hlt_loop
incl %ss:early_recursion_flag
- push %eax # 16(%esp)
- push %ecx # 12(%esp)
- push %edx # 8(%esp)
- push %ds # 4(%esp)
- push %es # 0(%esp)
- movl $(__KERNEL_DS),%eax
- movl %eax,%ds
- movl %eax,%es
+ /* The vector number is in pt_regs->gs */
- cmpl $(__KERNEL_CS),32(%esp)
- jne 10f
+ cld
+ pushl %fs /* pt_regs->fs */
+ movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
+ pushl %es /* pt_regs->es */
+ movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
+ pushl %ds /* pt_regs->ds */
+ movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
+ pushl %eax /* pt_regs->ax */
+ pushl %ebp /* pt_regs->bp */
+ pushl %edi /* pt_regs->di */
+ pushl %esi /* pt_regs->si */
+ pushl %edx /* pt_regs->dx */
+ pushl %ecx /* pt_regs->cx */
+ pushl %ebx /* pt_regs->bx */
- leal 28(%esp),%eax # Pointer to %eip
- call early_fixup_exception
- andl %eax,%eax
- jnz ex_entry /* found an exception entry */
+ /* Fix up DS and ES */
+ movl $(__KERNEL_DS), %ecx
+ movl %ecx, %ds
+ movl %ecx, %es
-10:
-#ifdef CONFIG_PRINTK
- xorl %eax,%eax
- movw %ax,2(%esp) /* clean up the segment values on some cpus */
- movw %ax,6(%esp)
- movw %ax,34(%esp)
- leal 40(%esp),%eax
- pushl %eax /* %esp before the exception */
- pushl %ebx
- pushl %ebp
- pushl %esi
- pushl %edi
- movl %cr2,%eax
- pushl %eax
- pushl (20+6*4)(%esp) /* trapno */
- pushl $fault_msg
- call printk
-#endif
- call dump_stack
-hlt_loop:
- hlt
- jmp hlt_loop
+ /* Load the vector number into EDX */
+ movl PT_GS(%esp), %edx
-ex_entry:
- pop %es
- pop %ds
- pop %edx
- pop %ecx
- pop %eax
- decl %ss:early_recursion_flag
-.Lis_nmi:
- addl $8,%esp /* drop vector number and error code */
+ /* Load GS into pt_regs->gs and clear high bits */
+ movw %gs, PT_GS(%esp)
+ movw $0, PT_GS+2(%esp)
+
+ movl %esp, %eax /* args are pt_regs (EAX), trapnr (EDX) */
+ call early_fixup_exception
+
+ popl %ebx /* pt_regs->bx */
+ popl %ecx /* pt_regs->cx */
+ popl %edx /* pt_regs->dx */
+ popl %esi /* pt_regs->si */
+ popl %edi /* pt_regs->di */
+ popl %ebp /* pt_regs->bp */
+ popl %eax /* pt_regs->ax */
+ popl %ds /* pt_regs->ds */
+ popl %es /* pt_regs->es */
+ popl %fs /* pt_regs->fs */
+ popl %gs /* pt_regs->gs */
+ decl %ss:early_recursion_flag
+ addl $4, %esp /* pop pt_regs->orig_ax */
iret
ENDPROC(early_idt_handler_common)
@@ -647,10 +638,14 @@
popl %eax
#endif
iret
+
+hlt_loop:
+ hlt
+ jmp hlt_loop
ENDPROC(ignore_int)
__INITDATA
.align 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
.long 0
__REFDATA
@@ -715,19 +710,6 @@
int_msg:
.asciz "Unknown interrupt or fault at: %p %p %p\n"
-fault_msg:
-/* fault info: */
- .ascii "BUG: Int %d: CR2 %p\n"
-/* regs pushed in early_idt_handler: */
- .ascii " EDI %p ESI %p EBP %p EBX %p\n"
- .ascii " ESP %p ES %p DS %p\n"
- .ascii " EDX %p ECX %p EAX %p\n"
-/* fault frame: */
- .ascii " vec %p err %p EIP %p CS %p flg %p\n"
- .ascii "Stack: %p %p %p %p %p %p %p %p\n"
- .ascii " %p %p %p %p %p %p %p %p\n"
- .asciz " %p %p %p %p %p %p %p %p\n"
-
#include "../../x86/xen/xen-head.S"
/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 22fbf9d..5df831e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -20,6 +20,7 @@
#include <asm/processor-flags.h>
#include <asm/percpu.h>
#include <asm/nops.h>
+#include "../entry/calling.h"
#ifdef CONFIG_PARAVIRT
#include <asm/asm-offsets.h>
@@ -64,6 +65,14 @@
* tables and then reload them.
*/
+ /*
+ * Setup stack for verify_cpu(). "-8" because stack_start is defined
+ * this way, see below. Our best guess is a NULL ptr for stack
+ * termination heuristics and we don't want to break anything which
+ * might depend on it (kgdb, ...).
+ */
+ leaq (__end_init_task - 8)(%rip), %rsp
+
/* Sanitize CPU configuration */
call verify_cpu
@@ -350,90 +359,48 @@
*/
cld
- cmpl $2,(%rsp) # X86_TRAP_NMI
- je .Lis_nmi # Ignore NMI
-
- cmpl $2,early_recursion_flag(%rip)
- jz 1f
incl early_recursion_flag(%rip)
- pushq %rax # 64(%rsp)
- pushq %rcx # 56(%rsp)
- pushq %rdx # 48(%rsp)
- pushq %rsi # 40(%rsp)
- pushq %rdi # 32(%rsp)
- pushq %r8 # 24(%rsp)
- pushq %r9 # 16(%rsp)
- pushq %r10 # 8(%rsp)
- pushq %r11 # 0(%rsp)
+ /* The vector number is currently in the pt_regs->di slot. */
+ pushq %rsi /* pt_regs->si */
+ movq 8(%rsp), %rsi /* RSI = vector number */
+ movq %rdi, 8(%rsp) /* pt_regs->di = RDI */
+ pushq %rdx /* pt_regs->dx */
+ pushq %rcx /* pt_regs->cx */
+ pushq %rax /* pt_regs->ax */
+ pushq %r8 /* pt_regs->r8 */
+ pushq %r9 /* pt_regs->r9 */
+ pushq %r10 /* pt_regs->r10 */
+ pushq %r11 /* pt_regs->r11 */
+ pushq %rbx /* pt_regs->bx */
+ pushq %rbp /* pt_regs->bp */
+ pushq %r12 /* pt_regs->r12 */
+ pushq %r13 /* pt_regs->r13 */
+ pushq %r14 /* pt_regs->r14 */
+ pushq %r15 /* pt_regs->r15 */
- cmpl $__KERNEL_CS,96(%rsp)
- jne 11f
-
- cmpl $14,72(%rsp) # Page fault?
+ cmpq $14,%rsi /* Page fault? */
jnz 10f
- GET_CR2_INTO(%rdi) # can clobber any volatile register if pv
+ GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */
call early_make_pgtable
andl %eax,%eax
- jz 20f # All good
+ jz 20f /* All good */
10:
- leaq 88(%rsp),%rdi # Pointer to %rip
+ movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */
call early_fixup_exception
- andl %eax,%eax
- jnz 20f # Found an exception entry
-11:
-#ifdef CONFIG_EARLY_PRINTK
- GET_CR2_INTO(%r9) # can clobber any volatile register if pv
- movl 80(%rsp),%r8d # error code
- movl 72(%rsp),%esi # vector number
- movl 96(%rsp),%edx # %cs
- movq 88(%rsp),%rcx # %rip
- xorl %eax,%eax
- leaq early_idt_msg(%rip),%rdi
- call early_printk
- cmpl $2,early_recursion_flag(%rip)
- jz 1f
- call dump_stack
-#ifdef CONFIG_KALLSYMS
- leaq early_idt_ripmsg(%rip),%rdi
- movq 40(%rsp),%rsi # %rip again
- call __print_symbol
-#endif
-#endif /* EARLY_PRINTK */
-1: hlt
- jmp 1b
-
-20: # Exception table entry found or page table generated
- popq %r11
- popq %r10
- popq %r9
- popq %r8
- popq %rdi
- popq %rsi
- popq %rdx
- popq %rcx
- popq %rax
+20:
decl early_recursion_flag(%rip)
-.Lis_nmi:
- addq $16,%rsp # drop vector number and error code
- INTERRUPT_RETURN
+ jmp restore_regs_and_iret
ENDPROC(early_idt_handler_common)
__INITDATA
.balign 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
.long 0
-#ifdef CONFIG_EARLY_PRINTK
-early_idt_msg:
- .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
-early_idt_ripmsg:
- .asciz "RIP %s\n"
-#endif /* CONFIG_EARLY_PRINTK */
-
#define NEXT_PAGE(name) \
.balign PAGE_SIZE; \
GLOBAL(name)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a1f0e4a..7282c2e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -773,7 +773,6 @@
.mask = HPET_MASK,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.resume = hpet_resume_counter,
- .archdata = { .vclock_mode = VCLOCK_HPET },
};
static int hpet_clocksource_register(void)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e565e0e..fc25f69 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -13,6 +13,7 @@
#include <linux/cpu.h>
#include <asm/kprobes.h>
#include <asm/alternative.h>
+#include <asm/text-patching.h>
#ifdef HAVE_JUMP_LABEL
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 2da6ee9..04cde52 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -45,6 +45,7 @@
#include <linux/uaccess.h>
#include <linux/memory.h>
+#include <asm/text-patching.h>
#include <asm/debugreg.h>
#include <asm/apicdef.h>
#include <asm/apic.h>
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index ae703ac..38cf7a7 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -51,6 +51,7 @@
#include <linux/ftrace.h>
#include <linux/frame.h>
+#include <asm/text-patching.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 7b3b9d1..4425f59 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -29,6 +29,7 @@
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
+#include <asm/text-patching.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8079508..dc1207e 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -522,7 +522,7 @@
if (boot_cpu_data.cpuid_level < 0)
return 0; /* So we don't blow up on old processors */
- if (cpu_has_hypervisor)
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
return 0;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 005c03e..477ae80 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -31,6 +31,7 @@
#include <linux/jump_label.h>
#include <linux/random.h>
+#include <asm/text-patching.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index f08ac28..f958391 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -339,8 +339,10 @@
.write_cr8 = native_write_cr8,
#endif
.wbinvd = native_wbinvd,
- .read_msr = native_read_msr_safe,
- .write_msr = native_write_msr_safe,
+ .read_msr = native_read_msr,
+ .write_msr = native_write_msr,
+ .read_msr_safe = native_read_msr_safe,
+ .write_msr_safe = native_write_msr_safe,
.read_pmc = native_read_pmc,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6cbab31..4285f6a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -136,25 +136,6 @@
}
}
-static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
-{
- struct user_desc ud = {
- .base_addr = addr,
- .limit = 0xfffff,
- .seg_32bit = 1,
- .limit_in_pages = 1,
- .useable = 1,
- };
- struct desc_struct *desc = t->thread.tls_array;
- desc += tls;
- fill_ldt(desc, &ud);
-}
-
-static inline u32 read_32bit_tls(struct task_struct *t, int tls)
-{
- return get_desc_base(&t->thread.tls_array[tls]);
-}
-
int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct *p, unsigned long tls)
{
@@ -169,9 +150,9 @@
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
- p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
+ p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
savesegment(fs, p->thread.fsindex);
- p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
+ p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -210,7 +191,7 @@
*/
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
- if (is_ia32_task())
+ if (in_ia32_syscall())
err = do_set_thread_area(p, -1,
(struct user_desc __user *)tls, 0);
else
@@ -282,7 +263,7 @@
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
- unsigned fsindex, gsindex;
+ unsigned prev_fsindex, prev_gsindex;
fpu_switch_t fpu_switch;
fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
@@ -292,8 +273,8 @@
*
* (e.g. xen_load_tls())
*/
- savesegment(fs, fsindex);
- savesegment(gs, gsindex);
+ savesegment(fs, prev_fsindex);
+ savesegment(gs, prev_gsindex);
/*
* Load TLS before restoring any segments so that segment loads
@@ -336,66 +317,104 @@
* Switch FS and GS.
*
* These are even more complicated than DS and ES: they have
- * 64-bit bases are that controlled by arch_prctl. Those bases
- * only differ from the values in the GDT or LDT if the selector
- * is 0.
+ * 64-bit bases are that controlled by arch_prctl. The bases
+ * don't necessarily match the selectors, as user code can do
+ * any number of things to cause them to be inconsistent.
*
- * Loading the segment register resets the hidden base part of
- * the register to 0 or the value from the GDT / LDT. If the
- * next base address zero, writing 0 to the segment register is
- * much faster than using wrmsr to explicitly zero the base.
+ * We don't promise to preserve the bases if the selectors are
+ * nonzero. We also don't promise to preserve the base if the
+ * selector is zero and the base doesn't match whatever was
+ * most recently passed to ARCH_SET_FS/GS. (If/when the
+ * FSGSBASE instructions are enabled, we'll need to offer
+ * stronger guarantees.)
*
- * The thread_struct.fs and thread_struct.gs values are 0
- * if the fs and gs bases respectively are not overridden
- * from the values implied by fsindex and gsindex. They
- * are nonzero, and store the nonzero base addresses, if
- * the bases are overridden.
- *
- * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
- * be impossible.
- *
- * Therefore we need to reload the segment registers if either
- * the old or new selector is nonzero, and we need to override
- * the base address if next thread expects it to be overridden.
- *
- * This code is unnecessarily slow in the case where the old and
- * new indexes are zero and the new base is nonzero -- it will
- * unnecessarily write 0 to the selector before writing the new
- * base address.
- *
- * Note: This all depends on arch_prctl being the only way that
- * user code can override the segment base. Once wrfsbase and
- * wrgsbase are enabled, most of this code will need to change.
+ * As an invariant,
+ * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
+ * impossible.
*/
- if (unlikely(fsindex | next->fsindex | prev->fs)) {
+ if (next->fsindex) {
+ /* Loading a nonzero value into FS sets the index and base. */
loadsegment(fs, next->fsindex);
-
- /*
- * If user code wrote a nonzero value to FS, then it also
- * cleared the overridden base address.
- *
- * XXX: if user code wrote 0 to FS and cleared the base
- * address itself, we won't notice and we'll incorrectly
- * restore the prior base address next time we reschdule
- * the process.
- */
- if (fsindex)
- prev->fs = 0;
+ } else {
+ if (next->fsbase) {
+ /* Next index is zero but next base is nonzero. */
+ if (prev_fsindex)
+ loadsegment(fs, 0);
+ wrmsrl(MSR_FS_BASE, next->fsbase);
+ } else {
+ /* Next base and index are both zero. */
+ if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+ /*
+ * We don't know the previous base and can't
+ * find out without RDMSR. Forcibly clear it.
+ */
+ loadsegment(fs, __USER_DS);
+ loadsegment(fs, 0);
+ } else {
+ /*
+ * If the previous index is zero and ARCH_SET_FS
+ * didn't change the base, then the base is
+ * also zero and we don't need to do anything.
+ */
+ if (prev->fsbase || prev_fsindex)
+ loadsegment(fs, 0);
+ }
+ }
}
- if (next->fs)
- wrmsrl(MSR_FS_BASE, next->fs);
- prev->fsindex = fsindex;
+ /*
+ * Save the old state and preserve the invariant.
+ * NB: if prev_fsindex == 0, then we can't reliably learn the base
+ * without RDMSR because Intel user code can zero it without telling
+ * us and AMD user code can program any 32-bit value without telling
+ * us.
+ */
+ if (prev_fsindex)
+ prev->fsbase = 0;
+ prev->fsindex = prev_fsindex;
- if (unlikely(gsindex | next->gsindex | prev->gs)) {
+ if (next->gsindex) {
+ /* Loading a nonzero value into GS sets the index and base. */
load_gs_index(next->gsindex);
-
- /* This works (and fails) the same way as fsindex above. */
- if (gsindex)
- prev->gs = 0;
+ } else {
+ if (next->gsbase) {
+ /* Next index is zero but next base is nonzero. */
+ if (prev_gsindex)
+ load_gs_index(0);
+ wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
+ } else {
+ /* Next base and index are both zero. */
+ if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+ /*
+ * We don't know the previous base and can't
+ * find out without RDMSR. Forcibly clear it.
+ *
+ * This contains a pointless SWAPGS pair.
+ * Fixing it would involve an explicit check
+ * for Xen or a new pvop.
+ */
+ load_gs_index(__USER_DS);
+ load_gs_index(0);
+ } else {
+ /*
+ * If the previous index is zero and ARCH_SET_GS
+ * didn't change the base, then the base is
+ * also zero and we don't need to do anything.
+ */
+ if (prev->gsbase || prev_gsindex)
+ load_gs_index(0);
+ }
+ }
}
- if (next->gs)
- wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
- prev->gsindex = gsindex;
+ /*
+ * Save the old state and preserve the invariant.
+ * NB: if prev_gsindex == 0, then we can't reliably learn the base
+ * without RDMSR because Intel user code can zero it without telling
+ * us and AMD user code can program any 32-bit value without telling
+ * us.
+ */
+ if (prev_gsindex)
+ prev->gsbase = 0;
+ prev->gsindex = prev_gsindex;
switch_fpu_finish(next_fpu, fpu_switch);
@@ -516,25 +535,12 @@
if (addr >= TASK_SIZE_OF(task))
return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
- switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, GS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
- load_gs_index(GS_TLS_SEL);
- }
- task->thread.gsindex = GS_TLS_SEL;
- task->thread.gs = 0;
- } else {
- task->thread.gsindex = 0;
- task->thread.gs = addr;
- if (doit) {
- load_gs_index(0);
- ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
- }
+ task->thread.gsindex = 0;
+ task->thread.gsbase = addr;
+ if (doit) {
+ load_gs_index(0);
+ ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
}
- put_cpu();
break;
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
@@ -542,52 +548,30 @@
if (addr >= TASK_SIZE_OF(task))
return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
- switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, FS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
- loadsegment(fs, FS_TLS_SEL);
- }
- task->thread.fsindex = FS_TLS_SEL;
- task->thread.fs = 0;
- } else {
- task->thread.fsindex = 0;
- task->thread.fs = addr;
- if (doit) {
- /* set the selector to 0 to not confuse
- __switch_to */
- loadsegment(fs, 0);
- ret = wrmsrl_safe(MSR_FS_BASE, addr);
- }
+ task->thread.fsindex = 0;
+ task->thread.fsbase = addr;
+ if (doit) {
+ /* set the selector to 0 to not confuse __switch_to */
+ loadsegment(fs, 0);
+ ret = wrmsrl_safe(MSR_FS_BASE, addr);
}
put_cpu();
break;
case ARCH_GET_FS: {
unsigned long base;
- if (task->thread.fsindex == FS_TLS_SEL)
- base = read_32bit_tls(task, FS_TLS);
- else if (doit)
+ if (doit)
rdmsrl(MSR_FS_BASE, base);
else
- base = task->thread.fs;
+ base = task->thread.fsbase;
ret = put_user(base, (unsigned long __user *)addr);
break;
}
case ARCH_GET_GS: {
unsigned long base;
- unsigned gsindex;
- if (task->thread.gsindex == GS_TLS_SEL)
- base = read_32bit_tls(task, GS_TLS);
- else if (doit) {
- savesegment(gs, gsindex);
- if (gsindex)
- rdmsrl(MSR_KERNEL_GS_BASE, base);
- else
- base = task->thread.gs;
- } else
- base = task->thread.gs;
+ if (doit)
+ rdmsrl(MSR_KERNEL_GS_BASE, base);
+ else
+ base = task->thread.gsbase;
ret = put_user(base, (unsigned long __user *)addr);
break;
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 32e9d9c..e60ef91 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -303,29 +303,11 @@
switch (offset) {
case offsetof(struct user_regs_struct,fs):
- /*
- * If this is setting fs as for normal 64-bit use but
- * setting fs_base has implicitly changed it, leave it.
- */
- if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
- task->thread.fs != 0) ||
- (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
- task->thread.fs == 0))
- break;
task->thread.fsindex = value;
if (task == current)
loadsegment(fs, task->thread.fsindex);
break;
case offsetof(struct user_regs_struct,gs):
- /*
- * If this is setting gs as for normal 64-bit use but
- * setting gs_base has implicitly changed it, leave it.
- */
- if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
- task->thread.gs != 0) ||
- (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
- task->thread.gs == 0))
- break;
task->thread.gsindex = value;
if (task == current)
load_gs_index(task->thread.gsindex);
@@ -417,7 +399,7 @@
* to set either thread.fs or thread.fsindex and the
* corresponding GDT slot.
*/
- if (child->thread.fs != value)
+ if (child->thread.fsbase != value)
return do_arch_prctl(child, ARCH_SET_FS, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
@@ -426,7 +408,7 @@
*/
if (value >= TASK_SIZE_OF(child))
return -EIO;
- if (child->thread.gs != value)
+ if (child->thread.gsbase != value)
return do_arch_prctl(child, ARCH_SET_GS, value);
return 0;
#endif
@@ -453,31 +435,17 @@
#ifdef CONFIG_X86_64
case offsetof(struct user_regs_struct, fs_base): {
/*
- * do_arch_prctl may have used a GDT slot instead of
- * the MSR. To userland, it appears the same either
- * way, except the %fs segment selector might not be 0.
+ * XXX: This will not behave as expected if called on
+ * current or if fsindex != 0.
*/
- unsigned int seg = task->thread.fsindex;
- if (task->thread.fs != 0)
- return task->thread.fs;
- if (task == current)
- asm("movl %%fs,%0" : "=r" (seg));
- if (seg != FS_TLS_SEL)
- return 0;
- return get_desc_base(&task->thread.tls_array[FS_TLS]);
+ return task->thread.fsbase;
}
case offsetof(struct user_regs_struct, gs_base): {
/*
- * Exactly the same here as the %fs handling above.
+ * XXX: This will not behave as expected if called on
+ * current or if fsindex != 0.
*/
- unsigned int seg = task->thread.gsindex;
- if (task->thread.gs != 0)
- return task->thread.gs;
- if (task == current)
- asm("movl %%gs,%0" : "=r" (seg));
- if (seg != GS_TLS_SEL)
- return 0;
- return get_desc_base(&task->thread.tls_array[GS_TLS]);
+ return task->thread.gsbase;
}
#endif
}
@@ -1266,7 +1234,7 @@
compat_ulong_t caddr, compat_ulong_t cdata)
{
#ifdef CONFIG_X86_X32_ABI
- if (!is_ia32_task())
+ if (!in_ia32_syscall())
return x32_arch_ptrace(child, request, caddr, cdata);
#endif
#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 548ddf7..2ebcc60 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -391,7 +391,7 @@
put_user_ex(&frame->uc, &frame->puc);
/* Create the ucontext. */
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
@@ -442,7 +442,7 @@
{
unsigned long flags;
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
else
flags = UC_SIGCONTEXT_SS;
@@ -762,7 +762,7 @@
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
#ifdef CONFIG_X86_64
- if (is_ia32_task())
+ if (in_ia32_syscall())
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a2065d3..1fe4130 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1231,7 +1231,7 @@
* If we couldn't find a local APIC, then get out of here now!
*/
if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
- !cpu_has_apic) {
+ !boot_cpu_has(X86_FEATURE_APIC)) {
if (!disable_apic) {
pr_err("BIOS bug, local APIC #%d not detected!...\n",
boot_cpu_physical_apicid);
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index ab40954..f386bad 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -40,7 +40,7 @@
static inline void flush_tce(void* tceaddr)
{
/* a single tce can't cross a cache line */
- if (cpu_has_clflush)
+ if (boot_cpu_has(X86_FEATURE_CLFLUSH))
clflush(tceaddr);
else
wbinvd();
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 7fc5e84..9692a5e 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -114,6 +114,7 @@
int can_allocate)
{
struct user_desc info;
+ unsigned short __maybe_unused sel, modified_sel;
if (copy_from_user(&info, u_info, sizeof(info)))
return -EFAULT;
@@ -141,6 +142,47 @@
set_tls_desc(p, idx, &info, 1);
+ /*
+ * If DS, ES, FS, or GS points to the modified segment, forcibly
+ * refresh it. Only needed on x86_64 because x86_32 reloads them
+ * on return to user mode.
+ */
+ modified_sel = (idx << 3) | 3;
+
+ if (p == current) {
+#ifdef CONFIG_X86_64
+ savesegment(ds, sel);
+ if (sel == modified_sel)
+ loadsegment(ds, sel);
+
+ savesegment(es, sel);
+ if (sel == modified_sel)
+ loadsegment(es, sel);
+
+ savesegment(fs, sel);
+ if (sel == modified_sel)
+ loadsegment(fs, sel);
+
+ savesegment(gs, sel);
+ if (sel == modified_sel)
+ load_gs_index(sel);
+#endif
+
+#ifdef CONFIG_X86_32_LAZY_GS
+ savesegment(gs, sel);
+ if (sel == modified_sel)
+ loadsegment(gs, sel);
+#endif
+ } else {
+#ifdef CONFIG_X86_64
+ if (p->thread.fsindex == modified_sel)
+ p->thread.fsbase = info.base_addr;
+
+ if (p->thread.gsindex == modified_sel)
+ p->thread.gsbase = info.base_addr;
+#endif
+ }
+
return 0;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 06cbe25..d159048 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
#include <asm/processor.h>
#include <asm/debugreg.h>
#include <linux/atomic.h>
+#include <asm/text-patching.h>
#include <asm/ftrace.h>
#include <asm/traps.h>
#include <asm/desc.h>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c9c4c7c..38ba6de 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -36,7 +36,7 @@
/* native_sched_clock() is called before tsc_init(), so
we must start with the TSC soft disabled to prevent
- erroneous rdtsc usage on !cpu_has_tsc processors */
+ erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */
static int __read_mostly tsc_disabled = -1;
static DEFINE_STATIC_KEY_FALSE(__use_tsc);
@@ -834,15 +834,15 @@
#ifndef CONFIG_SMP
unsigned long cpu_khz_old = cpu_khz;
- if (cpu_has_tsc) {
- tsc_khz = x86_platform.calibrate_tsc();
- cpu_khz = tsc_khz;
- cpu_data(0).loops_per_jiffy =
- cpufreq_scale(cpu_data(0).loops_per_jiffy,
- cpu_khz_old, cpu_khz);
- return 0;
- } else
+ if (!boot_cpu_has(X86_FEATURE_TSC))
return -ENODEV;
+
+ tsc_khz = x86_platform.calibrate_tsc();
+ cpu_khz = tsc_khz;
+ cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
+ cpu_khz_old, cpu_khz);
+
+ return 0;
#else
return -ENODEV;
#endif
@@ -922,9 +922,6 @@
struct cpufreq_freqs *freq = data;
unsigned long *lpj;
- if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
- return 0;
-
lpj = &boot_cpu_data.loops_per_jiffy;
#ifdef CONFIG_SMP
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
@@ -954,9 +951,9 @@
.notifier_call = time_cpufreq_notifier
};
-static int __init cpufreq_tsc(void)
+static int __init cpufreq_register_tsc_scaling(void)
{
- if (!cpu_has_tsc)
+ if (!boot_cpu_has(X86_FEATURE_TSC))
return 0;
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
return 0;
@@ -965,7 +962,7 @@
return 0;
}
-core_initcall(cpufreq_tsc);
+core_initcall(cpufreq_register_tsc_scaling);
#endif /* CONFIG_CPU_FREQ */
@@ -1081,7 +1078,7 @@
*/
int unsynchronized_tsc(void)
{
- if (!cpu_has_tsc || tsc_unstable)
+ if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
return 1;
#ifdef CONFIG_SMP
@@ -1205,7 +1202,7 @@
static int __init init_tsc_clocksource(void)
{
- if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
+ if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
return 0;
if (tsc_clocksource_reliable)
@@ -1242,7 +1239,7 @@
u64 lpj;
int cpu;
- if (!cpu_has_tsc) {
+ if (!boot_cpu_has(X86_FEATURE_TSC)) {
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index bf4db6e..98b4dc8 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -516,7 +516,7 @@
static inline int sizeof_long(void)
{
- return is_ia32_task() ? 4 : 8;
+ return in_ia32_syscall() ? 4 : 8;
}
static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bbbaa802..769af90 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -75,7 +75,7 @@
return 0;
/* Update OSXSAVE bit */
- if (cpu_has_xsave && best->function == 0x1) {
+ if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1) {
best->ecx &= ~F(OSXSAVE);
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
best->ecx |= F(OSXSAVE);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1ff4dbb..4c6972f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3844,7 +3844,8 @@
__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
boot_cpu_data.x86_phys_bits,
context->shadow_root_level, false,
- cpu_has_gbpages, true, true);
+ boot_cpu_has(X86_FEATURE_GBPAGES),
+ true, true);
else
__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
boot_cpu_data.x86_phys_bits,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 31346a3..fafd720 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1254,7 +1254,7 @@
kvm_load_ldt(svm->host.ldt);
#ifdef CONFIG_X86_64
loadsegment(fs, svm->host.fs);
- wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
+ wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
load_gs_index(svm->host.gs);
#else
#ifdef CONFIG_X86_32_LAZY_GS
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 2f1ea2f..b72743c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -809,8 +809,7 @@
#define host_clocks \
{VCLOCK_NONE, "none"}, \
- {VCLOCK_TSC, "tsc"}, \
- {VCLOCK_HPET, "hpet"} \
+ {VCLOCK_TSC, "tsc"} \
TRACE_EVENT(kvm_update_master_clock,
TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 133679d..cb47fe3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3390,7 +3390,7 @@
}
}
- if (cpu_has_xsaves)
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
rdmsrl(MSR_IA32_XSS, host_xss);
return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b7798c..12f33e6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2611,7 +2611,7 @@
r = KVM_MAX_MCE_BANKS;
break;
case KVM_CAP_XCRS:
- r = cpu_has_xsave;
+ r = boot_cpu_has(X86_FEATURE_XSAVE);
break;
case KVM_CAP_TSC_CONTROL:
r = kvm_has_tsc_control;
@@ -3094,7 +3094,7 @@
/* Set XSTATE_BV and possibly XCOMP_BV. */
xsave->header.xfeatures = xstate_bv;
- if (cpu_has_xsaves)
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
/*
@@ -3121,7 +3121,7 @@
static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
- if (cpu_has_xsave) {
+ if (boot_cpu_has(X86_FEATURE_XSAVE)) {
memset(guest_xsave, 0, sizeof(struct kvm_xsave));
fill_xsave((u8 *) guest_xsave->region, vcpu);
} else {
@@ -3139,7 +3139,7 @@
u64 xstate_bv =
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
- if (cpu_has_xsave) {
+ if (boot_cpu_has(X86_FEATURE_XSAVE)) {
/*
* Here we allow setting states that are not present in
* CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
@@ -3160,7 +3160,7 @@
static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
struct kvm_xcrs *guest_xcrs)
{
- if (!cpu_has_xsave) {
+ if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
guest_xcrs->nr_xcrs = 0;
return;
}
@@ -3176,7 +3176,7 @@
{
int i, r = 0;
- if (!cpu_has_xsave)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
return -EINVAL;
if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
@@ -5865,7 +5865,7 @@
perf_register_guest_info_callbacks(&kvm_guest_cbs);
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_lapic_init();
@@ -7293,7 +7293,7 @@
static void fx_init(struct kvm_vcpu *vcpu)
{
fpstate_init(&vcpu->arch.guest_fpu.state);
- if (cpu_has_xsaves)
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
host_xcr0 | XSTATE_COMPACTION_ENABLED;
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 91d93b9..b559d92 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -612,7 +612,7 @@
{
stac();
#ifdef CONFIG_X86_INTEL_USERCOPY
- if (n > 64 && cpu_has_xmm2)
+ if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
n = __copy_user_zeroing_intel_nocache(to, from, n);
else
__copy_user_zeroing(to, from, n);
@@ -629,7 +629,7 @@
{
stac();
#ifdef CONFIG_X86_INTEL_USERCOPY
- if (n > 64 && cpu_has_xmm2)
+ if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
n = __copy_user_intel_nocache(to, from, n);
else
__copy_user(to, from, n);
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 82447b3..4bb53b8 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,5 +1,6 @@
#include <linux/module.h>
#include <asm/uaccess.h>
+#include <asm/traps.h>
typedef bool (*ex_handler_t)(const struct exception_table_entry *,
struct pt_regs *, int);
@@ -42,6 +43,43 @@
}
EXPORT_SYMBOL(ex_handler_ext);
+bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ WARN_ONCE(1, "unchecked MSR access error: RDMSR from 0x%x\n",
+ (unsigned int)regs->cx);
+
+ /* Pretend that the read succeeded and returned 0. */
+ regs->ip = ex_fixup_addr(fixup);
+ regs->ax = 0;
+ regs->dx = 0;
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
+
+bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ WARN_ONCE(1, "unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n",
+ (unsigned int)regs->cx,
+ (unsigned int)regs->dx, (unsigned int)regs->ax);
+
+ /* Pretend that the write succeeded. */
+ regs->ip = ex_fixup_addr(fixup);
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
+
+bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ if (static_cpu_has(X86_BUG_NULL_SEG))
+ asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
+ asm volatile ("mov %0, %%fs" : : "rm" (0));
+ return ex_handler_default(fixup, regs, trapnr);
+}
+EXPORT_SYMBOL(ex_handler_clear_fs);
+
bool ex_has_fault_handler(unsigned long ip)
{
const struct exception_table_entry *e;
@@ -82,24 +120,46 @@
return handler(e, regs, trapnr);
}
+extern unsigned int early_recursion_flag;
+
/* Restricted version used during very early boot */
-int __init early_fixup_exception(unsigned long *ip)
+void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
{
- const struct exception_table_entry *e;
- unsigned long new_ip;
- ex_handler_t handler;
+ /* Ignore early NMIs. */
+ if (trapnr == X86_TRAP_NMI)
+ return;
- e = search_exception_tables(*ip);
- if (!e)
- return 0;
+ if (early_recursion_flag > 2)
+ goto halt_loop;
- new_ip = ex_fixup_addr(e);
- handler = ex_fixup_handler(e);
+ if (regs->cs != __KERNEL_CS)
+ goto fail;
- /* special handling not supported during early boot */
- if (handler != ex_handler_default)
- return 0;
+ /*
+ * The full exception fixup machinery is available as soon as
+ * the early IDT is loaded. This means that it is the
+ * responsibility of extable users to either function correctly
+ * when handlers are invoked early or to simply avoid causing
+ * exceptions before they're ready to handle them.
+ *
+ * This is better than filtering which handlers can be used,
+ * because refusing to call a handler here is guaranteed to
+ * result in a hard-to-debug panic.
+ *
+ * Keep in mind that not all vectors actually get here. Early
+ * fage faults, for example, are special.
+ */
+ if (fixup_exception(regs, trapnr))
+ return;
- *ip = new_ip;
- return 1;
+fail:
+ early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
+ (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
+ regs->orig_ax, read_cr2());
+
+ show_regs(regs);
+
+halt_loop:
+ while (true)
+ halt();
}
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 740d7ac..14a9505 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -162,7 +162,7 @@
unsigned long ps = memparse(opt, &opt);
if (ps == PMD_SIZE) {
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
- } else if (ps == PUD_SIZE && cpu_has_gbpages) {
+ } else if (ps == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES)) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
} else {
printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
@@ -177,7 +177,7 @@
static __init int gigantic_pages_init(void)
{
/* With compaction or CMA we can allocate gigantic pages at runtime */
- if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT))
+ if (boot_cpu_has(X86_FEATURE_GBPAGES) && !size_to_hstate(1UL << PUD_SHIFT))
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
return 0;
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 9d56f27..372aad2 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -157,23 +157,23 @@
* This will simplify cpa(), which otherwise needs to support splitting
* large pages into small in interrupt context, etc.
*/
- if (cpu_has_pse && !debug_pagealloc_enabled())
+ if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled())
page_size_mask |= 1 << PG_LEVEL_2M;
#endif
/* Enable PSE if available */
- if (cpu_has_pse)
+ if (boot_cpu_has(X86_FEATURE_PSE))
cr4_set_bits_and_update_boot(X86_CR4_PSE);
/* Enable PGE if available */
- if (cpu_has_pge) {
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4_set_bits_and_update_boot(X86_CR4_PGE);
__supported_pte_mask |= _PAGE_GLOBAL;
} else
__supported_pte_mask &= ~_PAGE_GLOBAL;
/* Enable 1 GB linear kernel mappings if available: */
- if (direct_gbpages && cpu_has_gbpages) {
+ if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
printk(KERN_INFO "Using GB pages for direct mapping\n");
page_size_mask |= 1 << PG_LEVEL_1G;
} else {
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index bd7a9b9..85af914 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -284,7 +284,7 @@
*/
mapping_iter = 1;
- if (!cpu_has_pse)
+ if (!boot_cpu_has(X86_FEATURE_PSE))
use_pse = 0;
repeat:
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 214afda..89d9747 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1295,7 +1295,7 @@
struct vmem_altmap *altmap = to_vmem_altmap(start);
int err;
- if (cpu_has_pse)
+ if (boot_cpu_has(X86_FEATURE_PSE))
err = vmemmap_populate_hugepages(start, end, node, altmap);
else if (altmap) {
pr_err_once("%s: no cpu support for altmap allocations\n",
@@ -1338,7 +1338,7 @@
}
get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
- if (!cpu_has_pse) {
+ if (!boot_cpu_has(X86_FEATURE_PSE)) {
next = (addr + PAGE_SIZE) & PAGE_MASK;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0d8d53d..f089491 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -378,7 +378,7 @@
int __init arch_ioremap_pud_supported(void)
{
#ifdef CONFIG_X86_64
- return cpu_has_gbpages;
+ return boot_cpu_has(X86_FEATURE_GBPAGES);
#else
return 0;
#endif
@@ -386,7 +386,7 @@
int __init arch_ioremap_pmd_supported(void)
{
- return cpu_has_pse;
+ return boot_cpu_has(X86_FEATURE_PSE);
}
/*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 01be9ec..bbf462f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1055,7 +1055,7 @@
/*
* Map everything starting from the Gb boundary, possibly with 1G pages
*/
- while (cpu_has_gbpages && end - start >= PUD_SIZE) {
+ while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) {
set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE |
massage_pgprot(pud_pgprot)));
@@ -1460,7 +1460,7 @@
* error case we fall back to cpa_flush_all (which uses
* WBINVD):
*/
- if (!ret && cpu_has_clflush) {
+ if (!ret && boot_cpu_has(X86_FEATURE_CLFLUSH)) {
if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
cpa_flush_array(addr, numpages, cache,
cpa.flags, pages);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index faec01e..fb0604f 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -40,11 +40,22 @@
static bool boot_cpu_done;
static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
+static void init_cache_modes(void);
-static inline void pat_disable(const char *reason)
+void pat_disable(const char *reason)
{
+ if (!__pat_enabled)
+ return;
+
+ if (boot_cpu_done) {
+ WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
+ return;
+ }
+
__pat_enabled = 0;
pr_info("x86/PAT: %s\n", reason);
+
+ init_cache_modes();
}
static int __init nopat(char *str)
@@ -181,7 +192,7 @@
* configuration.
* Using lower indices is preferred, so we start with highest index.
*/
-void pat_init_cache_modes(u64 pat)
+static void __init_cache_modes(u64 pat)
{
enum page_cache_mode cache;
char pat_msg[33];
@@ -202,14 +213,11 @@
{
u64 tmp_pat;
- if (!cpu_has_pat) {
+ if (!boot_cpu_has(X86_FEATURE_PAT)) {
pat_disable("PAT not supported by CPU.");
return;
}
- if (!pat_enabled())
- goto done;
-
rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
if (!tmp_pat) {
pat_disable("PAT MSR is 0, disabled.");
@@ -218,16 +226,12 @@
wrmsrl(MSR_IA32_CR_PAT, pat);
-done:
- pat_init_cache_modes(pat);
+ __init_cache_modes(pat);
}
static void pat_ap_init(u64 pat)
{
- if (!pat_enabled())
- return;
-
- if (!cpu_has_pat) {
+ if (!boot_cpu_has(X86_FEATURE_PAT)) {
/*
* If this happens we are on a secondary CPU, but switched to
* PAT on the boot CPU. We have no way to undo PAT.
@@ -238,18 +242,32 @@
wrmsrl(MSR_IA32_CR_PAT, pat);
}
-void pat_init(void)
+static void init_cache_modes(void)
{
- u64 pat;
- struct cpuinfo_x86 *c = &boot_cpu_data;
+ u64 pat = 0;
+ static int init_cm_done;
- if (!pat_enabled()) {
+ if (init_cm_done)
+ return;
+
+ if (boot_cpu_has(X86_FEATURE_PAT)) {
+ /*
+ * CPU supports PAT. Set PAT table to be consistent with
+ * PAT MSR. This case supports "nopat" boot option, and
+ * virtual machine environments which support PAT without
+ * MTRRs. In specific, Xen has unique setup to PAT MSR.
+ *
+ * If PAT MSR returns 0, it is considered invalid and emulates
+ * as No PAT.
+ */
+ rdmsrl(MSR_IA32_CR_PAT, pat);
+ }
+
+ if (!pat) {
/*
* No PAT. Emulate the PAT table that corresponds to the two
- * cache bits, PWT (Write Through) and PCD (Cache Disable). This
- * setup is the same as the BIOS default setup when the system
- * has PAT but the "nopat" boot option has been specified. This
- * emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
+ * cache bits, PWT (Write Through) and PCD (Cache Disable).
+ * This setup is also the same as the BIOS default setup.
*
* PTE encoding:
*
@@ -266,10 +284,36 @@
*/
pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+ }
- } else if ((c->x86_vendor == X86_VENDOR_INTEL) &&
- (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
- ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
+ __init_cache_modes(pat);
+
+ init_cm_done = 1;
+}
+
+/**
+ * pat_init - Initialize PAT MSR and PAT table
+ *
+ * This function initializes PAT MSR and PAT table with an OS-defined value
+ * to enable additional cache attributes, WC and WT.
+ *
+ * This function must be called on all CPUs using the specific sequence of
+ * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
+ * procedure for PAT.
+ */
+void pat_init(void)
+{
+ u64 pat;
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ if (!pat_enabled()) {
+ init_cache_modes();
+ return;
+ }
+
+ if ((c->x86_vendor == X86_VENDOR_INTEL) &&
+ (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
+ ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
/*
* PAT support with the lower four entries. Intel Pentium 2,
* 3, M, and 4 are affected by PAT errata, which makes the
@@ -734,25 +778,6 @@
if (file->f_flags & O_DSYNC)
pcm = _PAGE_CACHE_MODE_UC_MINUS;
-#ifdef CONFIG_X86_32
- /*
- * On the PPro and successors, the MTRRs are used to set
- * memory types for physical addresses outside main memory,
- * so blindly setting UC or PWT on those pages is wrong.
- * For Pentiums and earlier, the surround logic should disable
- * caching for the high addresses through the KEN pin, but
- * we maintain the tradition of paranoia in this code.
- */
- if (!pat_enabled() &&
- !(boot_cpu_has(X86_FEATURE_MTRR) ||
- boot_cpu_has(X86_FEATURE_K6_MTRR) ||
- boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
- boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
- (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
- pcm = _PAGE_CACHE_MODE_UC;
- }
-#endif
-
*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
cachemode2protval(pcm));
return 1;
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 0e07e09..28c0412 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -636,7 +636,7 @@
__u8 cpu_model = boot_cpu_data.x86_model;
struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
- if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
+ if (force_cpu_type == arch_perfmon && boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
return 0;
/*
@@ -700,7 +700,7 @@
char *cpu_type = NULL;
int ret = 0;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
if (force_cpu_type == timer)
@@ -761,7 +761,7 @@
if (cpu_type)
break;
- if (!cpu_has_arch_perfmon)
+ if (!boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
return -ENODEV;
/* use arch perfmon as fallback */
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index d90528e..350f709 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -75,7 +75,7 @@
u64 val;
int i;
- if (cpu_has_arch_perfmon) {
+ if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
union cpuid10_eax eax;
eax.full = cpuid_eax(0xa);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index beac4df..4bd08b0 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -445,7 +445,7 @@
uint32_t eax = cpuid_eax(xen_cpuid_base() + 4);
if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) ||
- ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && cpu_has_apic))
+ ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && boot_cpu_has(X86_FEATURE_APIC)))
return;
}
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 291226b..9f14bd3 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -106,7 +106,7 @@
* normal page tables.
* NOTE: We can mark everything as executable here
*/
- if (cpu_has_pse) {
+ if (boot_cpu_has(X86_FEATURE_PSE)) {
set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
pfn += PTRS_PER_PTE;
} else {
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 880862c..6ab6722 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,7 +75,6 @@
#include <asm/mach_traps.h>
#include <asm/mwait.h>
#include <asm/pci_x86.h>
-#include <asm/pat.h>
#include <asm/cpu.h>
#ifdef CONFIG_ACPI
@@ -1093,6 +1092,26 @@
return ret;
}
+static u64 xen_read_msr(unsigned int msr)
+{
+ /*
+ * This will silently swallow a #GP from RDMSR. It may be worth
+ * changing that.
+ */
+ int err;
+
+ return xen_read_msr_safe(msr, &err);
+}
+
+static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
+{
+ /*
+ * This will silently swallow a #GP from WRMSR. It may be worth
+ * changing that.
+ */
+ xen_write_msr_safe(msr, low, high);
+}
+
void xen_setup_shared_info(void)
{
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1223,8 +1242,11 @@
.wbinvd = native_wbinvd,
- .read_msr = xen_read_msr_safe,
- .write_msr = xen_write_msr_safe,
+ .read_msr = xen_read_msr,
+ .write_msr = xen_write_msr,
+
+ .read_msr_safe = xen_read_msr_safe,
+ .write_msr_safe = xen_write_msr_safe,
.read_pmc = xen_read_pmc,
@@ -1469,10 +1491,10 @@
* For BSP, PSE PGE are set in probe_page_size_mask(), for APs
* set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu().
*/
- if (cpu_has_pse)
+ if (boot_cpu_has(X86_FEATURE_PSE))
cr4_set_bits_and_update_boot(X86_CR4_PSE);
- if (cpu_has_pge)
+ if (boot_cpu_has(X86_FEATURE_PGE))
cr4_set_bits_and_update_boot(X86_CR4_PGE);
}
@@ -1511,7 +1533,6 @@
{
struct physdev_set_iopl set_iopl;
unsigned long initrd_start = 0;
- u64 pat;
int rc;
if (!xen_start_info)
@@ -1618,13 +1639,6 @@
xen_start_info->nr_pages);
xen_reserve_special_pages();
- /*
- * Modify the cache mode translation tables to match Xen's PAT
- * configuration.
- */
- rdmsrl(MSR_IA32_CR_PAT, pat);
- pat_init_cache_modes(pat);
-
/* keep using Xen gdt for now; no urgent need to change it */
#ifdef CONFIG_X86_32
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index 0f6b229..247bfa8 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -945,7 +945,7 @@
}
#endif
#ifdef CONFIG_X86_IO_APIC
- if (cpu_has_apic) {
+ if (boot_cpu_has(X86_FEATURE_APIC)) {
printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
"broken in this configuration.\n");
return -ENODEV;
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 6743ff7..059f7c3 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -72,7 +72,7 @@
{
#if defined(CONFIG_X86)
- if (cpu_has_clflush) {
+ if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
drm_cache_flush_clflush(pages, num_pages);
return;
}
@@ -105,7 +105,7 @@
drm_clflush_sg(struct sg_table *st)
{
#if defined(CONFIG_X86)
- if (cpu_has_clflush) {
+ if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
struct sg_page_iter sg_iter;
mb();
@@ -129,7 +129,7 @@
drm_clflush_virt_range(void *addr, unsigned long length)
{
#if defined(CONFIG_X86)
- if (cpu_has_clflush) {
+ if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
const int size = boot_cpu_data.x86_clflush_size;
void *end = addr + length;
addr = (void *)(((unsigned long)addr) & -size);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dabc089..f2cb9a9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1732,7 +1732,7 @@
if (args->flags & ~(I915_MMAP_WC))
return -EINVAL;
- if (args->flags & I915_MMAP_WC && !cpu_has_pat)
+ if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
return -ENODEV;
obj = drm_gem_object_lookup(dev, file, args->handle);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1328bc5..b845f46 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -488,7 +488,7 @@
ret = relocate_entry_cpu(obj, reloc, target_offset);
else if (obj->map_and_fenceable)
ret = relocate_entry_gtt(obj, reloc, target_offset);
- else if (cpu_has_clflush)
+ else if (static_cpu_has(X86_FEATURE_CLFLUSH))
ret = relocate_entry_clflush(obj, reloc, target_offset);
else {
WARN_ONCE(1, "Impossible case in relocation handling\n");
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index 6f8b084..3d8ff09 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -143,9 +143,9 @@
#include <linux/i8253.h>
-#define GET_TIME(x) do { if (cpu_has_tsc) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0)
-#define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
-#define TIME_NAME (cpu_has_tsc?"TSC":"PIT")
+#define GET_TIME(x) do { if (boot_cpu_has(X86_FEATURE_TSC)) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0)
+#define DELTA(x,y) (boot_cpu_has(X86_FEATURE_TSC) ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
+#define TIME_NAME (boot_cpu_has(X86_FEATURE_TSC)?"TSC":"PIT")
static unsigned int get_time_pit(void)
{
unsigned long flags;
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 8adaaea..49721b4 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -36,7 +36,7 @@
* As this gets called during crash dump, keep this simple for
* now.
*/
- if (cpu_has_apic || apic_from_smp_config())
+ if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
disconnect_bsp_APIC(0);
}
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index adc162c..6e9042e 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -603,7 +603,7 @@
* doing this.
*/
get_online_cpus();
- if (cpu_has_pge) { /* We have a broader idea of "global". */
+ if (boot_cpu_has(X86_FEATURE_PGE)) { /* We have a broader idea of "global". */
/* Remember that this was originally set (for cleanup). */
cpu_had_pge = 1;
/*
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 72c9f1f..7c78307 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -635,10 +635,10 @@
#ifdef __i386__
#include <asm/msr.h>
-#define GETTICK(x) \
-({ \
- if (cpu_has_tsc) \
- x = (unsigned int)rdtsc(); \
+#define GETTICK(x) \
+({ \
+ if (boot_cpu_has(X86_FEATURE_TSC)) \
+ x = (unsigned int)rdtsc(); \
})
#else /* __i386__ */
#define GETTICK(x)
diff --git a/drivers/staging/unisys/visorbus/visorchipset.c b/drivers/staging/unisys/visorbus/visorchipset.c
index 5fbda7b..9cf4f84 100644
--- a/drivers/staging/unisys/visorbus/visorchipset.c
+++ b/drivers/staging/unisys/visorbus/visorchipset.c
@@ -2425,7 +2425,7 @@
{
unsigned int eax, ebx, ecx, edx;
- if (cpu_has_hypervisor) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
/* check the ID */
cpuid(UNISYS_SPAR_LEAF_ID, &eax, &ebx, &ecx, &edx);
return (ebx == UNISYS_SPAR_ID_EBX) &&
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 339125b..6a67ab9 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -245,7 +245,9 @@
#define INIT_TASK_DATA(align) \
. = ALIGN(align); \
- *(.data..init_task)
+ VMLINUX_SYMBOL(__start_init_task) = .; \
+ *(.data..init_task) \
+ VMLINUX_SYMBOL(__end_init_task) = .;
/*
* Read only Data
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index b47ebd1..c73425de 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -9,6 +9,7 @@
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
+TARGETS_C_64BIT_ONLY := fsgsbase
TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
new file mode 100644
index 0000000..5b2b4b3
--- /dev/null
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -0,0 +1,398 @@
+/*
+ * fsgsbase.c, an fsgsbase test
+ * Copyright (c) 2014-2016 Andy Lutomirski
+ * GPL v2
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <err.h>
+#include <sys/user.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <signal.h>
+#include <limits.h>
+#include <sys/ucontext.h>
+#include <sched.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <asm/ldt.h>
+#include <sys/mman.h>
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+static volatile sig_atomic_t want_segv;
+static volatile unsigned long segv_addr;
+
+static int nerrs;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (!want_segv) {
+ clearhandler(SIGSEGV);
+ return; /* Crash cleanly. */
+ }
+
+ want_segv = false;
+ segv_addr = (unsigned long)si->si_addr;
+
+ ctx->uc_mcontext.gregs[REG_RIP] += 4; /* Skip the faulting mov */
+
+}
+
+enum which_base { FS, GS };
+
+static unsigned long read_base(enum which_base which)
+{
+ unsigned long offset;
+ /*
+ * Unless we have FSGSBASE, there's no direct way to do this from
+ * user mode. We can get at it indirectly using signals, though.
+ */
+
+ want_segv = true;
+
+ offset = 0;
+ if (which == FS) {
+ /* Use a constant-length instruction here. */
+ asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+ } else {
+ asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+ }
+ if (!want_segv)
+ return segv_addr + offset;
+
+ /*
+ * If that didn't segfault, try the other end of the address space.
+ * Unless we get really unlucky and run into the vsyscall page, this
+ * is guaranteed to segfault.
+ */
+
+ offset = (ULONG_MAX >> 1) + 1;
+ if (which == FS) {
+ asm volatile ("mov %%fs:(%%rcx), %%rax"
+ : : "c" (offset) : "rax");
+ } else {
+ asm volatile ("mov %%gs:(%%rcx), %%rax"
+ : : "c" (offset) : "rax");
+ }
+ if (!want_segv)
+ return segv_addr + offset;
+
+ abort();
+}
+
+static void check_gs_value(unsigned long value)
+{
+ unsigned long base;
+ unsigned short sel;
+
+ printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value);
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0)
+ err(1, "ARCH_SET_GS");
+
+ asm volatile ("mov %%gs, %0" : "=rm" (sel));
+ base = read_base(GS);
+ if (base == value) {
+ printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n",
+ sel);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n",
+ base, sel);
+ }
+
+ if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0)
+ err(1, "ARCH_GET_GS");
+ if (base == value) {
+ printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n",
+ sel);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n",
+ base, sel);
+ }
+}
+
+static void mov_0_gs(unsigned long initial_base, bool schedule)
+{
+ unsigned long base, arch_base;
+
+ printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : "");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (schedule)
+ usleep(10);
+
+ asm volatile ("mov %0, %%gs" : : "rm" (0));
+ base = read_base(GS);
+ if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0)
+ err(1, "ARCH_GET_GS");
+ if (base == arch_base) {
+ printf("[OK]\tGSBASE is 0x%lx\n", base);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base);
+ }
+}
+
+static volatile unsigned long remote_base;
+static volatile bool remote_hard_zero;
+static volatile unsigned int ftx;
+
+/*
+ * ARCH_SET_FS/GS(0) may or may not program a selector of zero. HARD_ZERO
+ * means to force the selector to zero to improve test coverage.
+ */
+#define HARD_ZERO 0xa1fa5f343cb85fa4
+
+static void do_remote_base()
+{
+ unsigned long to_set = remote_base;
+ bool hard_zero = false;
+ if (to_set == HARD_ZERO) {
+ to_set = 0;
+ hard_zero = true;
+ }
+
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (hard_zero)
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+ unsigned short sel;
+ asm volatile ("mov %%gs, %0" : "=rm" (sel));
+ printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n",
+ to_set, hard_zero ? " and clear gs" : "", sel);
+}
+
+void do_unexpected_base(void)
+{
+ /*
+ * The goal here is to try to arrange for GS == 0, GSBASE !=
+ * 0, and for the the kernel the think that GSBASE == 0.
+ *
+ * To make the test as reliable as possible, this uses
+ * explicit descriptorss. (This is not the only way. This
+ * could use ARCH_SET_GS with a low, nonzero base, but the
+ * relevant side effect of ARCH_SET_GS could change.)
+ */
+
+ /* Step 1: tell the kernel that we have GSBASE == 0. */
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+ err(1, "ARCH_SET_GS");
+
+ /* Step 2: change GSBASE without telling the kernel. */
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0xBAADF00D,
+ .limit = 0xfffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 1,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
+ printf("\tother thread: using LDT slot 0\n");
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
+ } else {
+ /* No modify_ldt for us (configured out, perhaps) */
+
+ struct user_desc *low_desc = mmap(
+ NULL, sizeof(desc),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ memcpy(low_desc, &desc, sizeof(desc));
+
+ low_desc->entry_number = -1;
+
+ /* 32-bit set_thread_area */
+ long ret;
+ asm volatile ("int $0x80"
+ : "=a" (ret) : "a" (243), "b" (low_desc)
+ : "flags");
+ memcpy(&desc, low_desc, sizeof(desc));
+ munmap(low_desc, sizeof(desc));
+
+ if (ret != 0) {
+ printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
+ return;
+ }
+ printf("\tother thread: using GDT slot %d\n", desc.entry_number);
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3)));
+ }
+
+ /*
+ * Step 3: set the selector back to zero. On AMD chips, this will
+ * preserve GSBASE.
+ */
+
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+}
+
+static void *threadproc(void *ctx)
+{
+ while (1) {
+ while (ftx == 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
+ if (ftx == 3)
+ return NULL;
+
+ if (ftx == 1)
+ do_remote_base();
+ else if (ftx == 2)
+ do_unexpected_base();
+ else
+ errx(1, "helper thread got bad command");
+
+ ftx = 0;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ }
+}
+
+static void set_gs_and_switch_to(unsigned long local, unsigned long remote)
+{
+ unsigned long base;
+
+ bool hard_zero = false;
+ if (local == HARD_ZERO) {
+ hard_zero = true;
+ local = 0;
+ }
+
+ printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n",
+ local, hard_zero ? " and clear gs" : "", remote);
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0)
+ err(1, "ARCH_SET_GS");
+ if (hard_zero)
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+ if (read_base(GS) != local) {
+ nerrs++;
+ printf("[FAIL]\tGSBASE wasn't set as expected\n");
+ }
+
+ remote_base = remote;
+ ftx = 1;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ while (ftx != 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+ base = read_base(GS);
+ if (base == local) {
+ printf("[OK]\tGSBASE remained 0x%lx\n", local);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
+ }
+}
+
+static void test_unexpected_base(void)
+{
+ unsigned long base;
+
+ printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+ err(1, "ARCH_SET_GS");
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+ ftx = 2;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ while (ftx != 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+ base = read_base(GS);
+ if (base == 0) {
+ printf("[OK]\tGSBASE remained 0\n");
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
+ }
+}
+
+int main()
+{
+ pthread_t thread;
+
+ sethandler(SIGSEGV, sigsegv, 0);
+
+ check_gs_value(0);
+ check_gs_value(1);
+ check_gs_value(0x200000000);
+ check_gs_value(0);
+ check_gs_value(0x200000000);
+ check_gs_value(1);
+
+ for (int sched = 0; sched < 2; sched++) {
+ mov_0_gs(0, !!sched);
+ mov_0_gs(1, !!sched);
+ mov_0_gs(0x200000000, !!sched);
+ }
+
+ /* Set up for multithreading. */
+
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 0"); /* should never fail */
+
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ err(1, "pthread_create");
+
+ static unsigned long bases_with_hard_zero[] = {
+ 0, HARD_ZERO, 1, 0x200000000,
+ };
+
+ for (int local = 0; local < 4; local++) {
+ for (int remote = 0; remote < 4; remote++) {
+ set_gs_and_switch_to(bases_with_hard_zero[local],
+ bases_with_hard_zero[remote]);
+ }
+ }
+
+ test_unexpected_base();
+
+ ftx = 3; /* Kill the thread. */
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+ if (pthread_join(thread, NULL) != 0)
+ err(1, "pthread_join");
+
+ return nerrs == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 31a3035..4af4707 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -21,6 +21,9 @@
#include <pthread.h>
#include <sched.h>
#include <linux/futex.h>
+#include <sys/mman.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
#define AR_ACCESSED (1<<8)
@@ -44,6 +47,12 @@
static int nerrs;
+/* Points to an array of 1024 ints, each holding its own index. */
+static const unsigned int *counter_page;
+static struct user_desc *low_user_desc;
+static struct user_desc *low_user_desc_clear; /* Use to delete GDT entry */
+static int gdt_entry_num;
+
static void check_invalid_segment(uint16_t index, int ldt)
{
uint32_t has_limit = 0, has_ar = 0, limit, ar;
@@ -561,16 +570,257 @@
}
}
+static void setup_counter_page(void)
+{
+ unsigned int *page = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+ if (page == MAP_FAILED)
+ err(1, "mmap");
+
+ for (int i = 0; i < 1024; i++)
+ page[i] = i;
+ counter_page = page;
+}
+
+static int invoke_set_thread_area(void)
+{
+ int ret;
+ asm volatile ("int $0x80"
+ : "=a" (ret), "+m" (low_user_desc) :
+ "a" (243), "b" (low_user_desc)
+ : "flags");
+ return ret;
+}
+
+static void setup_low_user_desc(void)
+{
+ low_user_desc = mmap(NULL, 2 * sizeof(struct user_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+ if (low_user_desc == MAP_FAILED)
+ err(1, "mmap");
+
+ low_user_desc->entry_number = -1;
+ low_user_desc->base_addr = (unsigned long)&counter_page[1];
+ low_user_desc->limit = 0xfffff;
+ low_user_desc->seg_32bit = 1;
+ low_user_desc->contents = 0; /* Data, grow-up*/
+ low_user_desc->read_exec_only = 0;
+ low_user_desc->limit_in_pages = 1;
+ low_user_desc->seg_not_present = 0;
+ low_user_desc->useable = 0;
+
+ if (invoke_set_thread_area() == 0) {
+ gdt_entry_num = low_user_desc->entry_number;
+ printf("[NOTE]\tset_thread_area is available; will use GDT index %d\n", gdt_entry_num);
+ } else {
+ printf("[NOTE]\tset_thread_area is unavailable\n");
+ }
+
+ low_user_desc_clear = low_user_desc + 1;
+ low_user_desc_clear->entry_number = gdt_entry_num;
+ low_user_desc_clear->read_exec_only = 1;
+ low_user_desc_clear->seg_not_present = 1;
+}
+
+static void test_gdt_invalidation(void)
+{
+ if (!gdt_entry_num)
+ return; /* 64-bit only system -- we can't use set_thread_area */
+
+ unsigned short prev_sel;
+ unsigned short sel;
+ unsigned int eax;
+ const char *result;
+#ifdef __x86_64__
+ unsigned long saved_base;
+ unsigned long new_base;
+#endif
+
+ /* Test DS */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+ asm volatile ("movw %%ds, %[prev_sel]\n\t"
+ "movw %[sel], %%ds\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate ds */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%ds, %[sel]\n\t"
+ "movw %[prev_sel], %%ds"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : "flags");
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate DS with set_thread_area: new DS = 0x%hx\n",
+ result, sel);
+
+ /* Test ES */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+ asm volatile ("movw %%es, %[prev_sel]\n\t"
+ "movw %[sel], %%es\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate es */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%es, %[sel]\n\t"
+ "movw %[prev_sel], %%es"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : "flags");
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate ES with set_thread_area: new ES = 0x%hx\n",
+ result, sel);
+
+ /* Test FS */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_FS, &saved_base);
+#endif
+ asm volatile ("movw %%fs, %[prev_sel]\n\t"
+ "movw %[sel], %%fs\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate fs */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%fs, %[sel]\n\t"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : "flags");
+
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
+#endif
+
+ /* Restore FS/BASE for glibc */
+ asm volatile ("movw %[prev_sel], %%fs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+ if (saved_base)
+ syscall(SYS_arch_prctl, ARCH_SET_FS, saved_base);
+#endif
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate FS with set_thread_area: new FS = 0x%hx\n",
+ result, sel);
+
+#ifdef __x86_64__
+ if (sel == 0 && new_base != 0) {
+ nerrs++;
+ printf("[FAIL]\tNew FSBASE was 0x%lx\n", new_base);
+ } else {
+ printf("[OK]\tNew FSBASE was zero\n");
+ }
+#endif
+
+ /* Test GS */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_GS, &saved_base);
+#endif
+ asm volatile ("movw %%gs, %[prev_sel]\n\t"
+ "movw %[sel], %%gs\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate gs */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%gs, %[sel]\n\t"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : "flags");
+
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
+#endif
+
+ /* Restore GS/BASE for glibc */
+ asm volatile ("movw %[prev_sel], %%gs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+ if (saved_base)
+ syscall(SYS_arch_prctl, ARCH_SET_GS, saved_base);
+#endif
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate GS with set_thread_area: new GS = 0x%hx\n",
+ result, sel);
+
+#ifdef __x86_64__
+ if (sel == 0 && new_base != 0) {
+ nerrs++;
+ printf("[FAIL]\tNew GSBASE was 0x%lx\n", new_base);
+ } else {
+ printf("[OK]\tNew GSBASE was zero\n");
+ }
+#endif
+}
+
int main(int argc, char **argv)
{
if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec"))
return finish_exec_test();
+ setup_counter_page();
+ setup_low_user_desc();
+
do_simple_tests();
do_multicpu_tests();
do_exec_test();
+ test_gdt_invalidation();
+
return nerrs ? 1 : 0;
}