Merge commit 'upstream-x86-virt' into WIP.x86/mm
Merge a minimal set of virt cleanups, for a base for the MM isolation patches.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/.mailmap b/.mailmap
index 4757d36..c021f29 100644
--- a/.mailmap
+++ b/.mailmap
@@ -102,6 +102,7 @@
Linas Vepstas <linas@austin.ibm.com>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
+Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
Mark Brown <broonie@sirena.org.uk>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
index af0c9a4..cd4b29b 100644
--- a/Documentation/x86/orc-unwinder.txt
+++ b/Documentation/x86/orc-unwinder.txt
@@ -4,7 +4,7 @@
Overview
--------
-The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
+The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
similar in concept to a DWARF unwinder. The difference is that the
format of the ORC data is much simpler than DWARF, which in turn allows
the ORC unwinder to be much simpler and faster.
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index b0798e2..3448e67 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -34,7 +34,7 @@
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ...
-ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
+ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
... unused hole ...
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
diff --git a/Makefile b/Makefile
index bee2033..9a43f19 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 4
PATCHLEVEL = 14
SUBLEVEL = 0
-EXTRAVERSION = -rc8
+EXTRAVERSION =
NAME = Fearless Coyote
# *DOCUMENTATION*
@@ -934,8 +934,8 @@
ifeq ($(has_libelf),1)
objtool_target := tools/objtool FORCE
else
- ifdef CONFIG_ORC_UNWINDER
- $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
+ ifdef CONFIG_UNWINDER_ORC
+ $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
else
$(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
endif
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index caf86be..4052ec3 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -51,6 +51,13 @@
FIX_EARLYCON_MEM_BASE,
FIX_TEXT_POKE0,
+
+#ifdef CONFIG_ACPI_APEI_GHES
+ /* Used for GHES mapping from assorted contexts */
+ FIX_APEI_GHES_IRQ,
+ FIX_APEI_GHES_NMI,
+#endif /* CONFIG_ACPI_APEI_GHES */
+
__end_of_permanent_fixed_addresses,
/*
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index df7acea..4674f1e 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -575,6 +575,7 @@
uart_port.type = PORT_AR7;
uart_port.uartclk = clk_get_rate(bus_clk) / 2;
uart_port.iotype = UPIO_MEM32;
+ uart_port.flags = UPF_FIXED_TYPE;
uart_port.regshift = 2;
uart_port.line = 0;
@@ -653,6 +654,10 @@
u32 val;
int res;
+ res = ar7_gpio_init();
+ if (res)
+ pr_warn("unable to register gpios: %d\n", res);
+
res = ar7_register_uarts();
if (res)
pr_err("unable to setup uart(s): %d\n", res);
diff --git a/arch/mips/ar7/prom.c b/arch/mips/ar7/prom.c
index 4fd8333..dd53987 100644
--- a/arch/mips/ar7/prom.c
+++ b/arch/mips/ar7/prom.c
@@ -246,8 +246,6 @@
ar7_init_cmdline(fw_arg0, (char **)fw_arg1);
ar7_init_env((struct env_var *)fw_arg2);
console_config();
-
- ar7_gpio_init();
}
#define PORT(offset) (KSEG1ADDR(AR7_REGS_UART0 + (offset * 4)))
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 406072e..87dcac2 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -591,11 +591,11 @@
/* Flush and enable RAC */
cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG);
- __raw_writel(cfg | 0x100, BMIPS_RAC_CONFIG);
+ __raw_writel(cfg | 0x100, cbr + BMIPS_RAC_CONFIG);
__raw_readl(cbr + BMIPS_RAC_CONFIG);
cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG);
- __raw_writel(cfg | 0xf, BMIPS_RAC_CONFIG);
+ __raw_writel(cfg | 0xf, cbr + BMIPS_RAC_CONFIG);
__raw_readl(cbr + BMIPS_RAC_CONFIG);
cfg = __raw_readl(cbr + BMIPS_RAC_ADDRESS_RANGE);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7c62967..59247af 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -646,6 +646,16 @@
hnow_v = hpte_new_to_old_v(hnow_v, hnow_r);
hnow_r = hpte_new_to_old_r(hnow_r);
}
+
+ /*
+ * If the HPT is being resized, don't update the HPTE,
+ * instead let the guest retry after the resize operation is complete.
+ * The synchronization for hpte_setup_done test vs. set is provided
+ * by the HPTE lock.
+ */
+ if (!kvm->arch.hpte_setup_done)
+ goto out_unlock;
+
if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] ||
rev->guest_rpte != hpte[2])
/* HPTE has been changed under us; let the guest retry */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 73bf1eb..8d43cf2 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2705,11 +2705,14 @@
* Hard-disable interrupts, and check resched flag and signals.
* If we need to reschedule or deliver a signal, clean up
* and return without going into the guest(s).
+ * If the hpte_setup_done flag has been cleared, don't go into the
+ * guest because that means a HPT resize operation is in progress.
*/
local_irq_disable();
hard_irq_disable();
if (lazy_irq_pending() || need_resched() ||
- recheck_signals(&core_info)) {
+ recheck_signals(&core_info) ||
+ (!kvm_is_radix(vc->kvm) && !vc->kvm->arch.hpte_setup_done)) {
local_irq_enable();
vc->vcore_state = VCORE_INACTIVE;
/* Unlock all except the primary vcore */
@@ -3078,7 +3081,7 @@
static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
- int n_ceded, i;
+ int n_ceded, i, r;
struct kvmppc_vcore *vc;
struct kvm_vcpu *v;
@@ -3132,6 +3135,20 @@
while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
!signal_pending(current)) {
+ /* See if the HPT and VRMA are ready to go */
+ if (!kvm_is_radix(vcpu->kvm) &&
+ !vcpu->kvm->arch.hpte_setup_done) {
+ spin_unlock(&vc->lock);
+ r = kvmppc_hv_setup_htab_rma(vcpu);
+ spin_lock(&vc->lock);
+ if (r) {
+ kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ kvm_run->fail_entry.hardware_entry_failure_reason = 0;
+ vcpu->arch.ret = r;
+ break;
+ }
+ }
+
if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
kvmppc_vcore_end_preempt(vc);
@@ -3249,13 +3266,6 @@
/* Order vcpus_running vs. hpte_setup_done, see kvmppc_alloc_reset_hpt */
smp_mb();
- /* On the first time here, set up HTAB and VRMA */
- if (!kvm_is_radix(vcpu->kvm) && !vcpu->kvm->arch.hpte_setup_done) {
- r = kvmppc_hv_setup_htab_rma(vcpu);
- if (r)
- goto out;
- }
-
flush_all_to_thread(current);
/* Save userspace EBB and other register values */
@@ -3303,7 +3313,6 @@
}
mtspr(SPRN_VRSAVE, user_vrsave);
- out:
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&vcpu->kvm->arch.vcpus_running);
return r;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2fdb233..4ae940a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -171,7 +171,7 @@
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
- select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
+ select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
select HAVE_STACK_VALIDATION if X86_64
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
@@ -303,7 +303,6 @@
config KASAN_SHADOW_OFFSET
hex
depends on KASAN
- default 0xdff8000000000000 if X86_5LEVEL
default 0xdffffc0000000000
config HAVE_INTEL_TXT
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 90b1230..6293a87 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -359,28 +359,14 @@
choice
prompt "Choose kernel unwinder"
- default FRAME_POINTER_UNWINDER
+ default UNWINDER_ORC if X86_64
+ default UNWINDER_FRAME_POINTER if X86_32
---help---
This determines which method will be used for unwinding kernel stack
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
livepatch, lockdep, and more.
-config FRAME_POINTER_UNWINDER
- bool "Frame pointer unwinder"
- select FRAME_POINTER
- ---help---
- This option enables the frame pointer unwinder for unwinding kernel
- stack traces.
-
- The unwinder itself is fast and it uses less RAM than the ORC
- unwinder, but the kernel text size will grow by ~3% and the kernel's
- overall performance will degrade by roughly 5-10%.
-
- This option is recommended if you want to use the livepatch
- consistency model, as this is currently the only way to get a
- reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
-
-config ORC_UNWINDER
+config UNWINDER_ORC
bool "ORC unwinder"
depends on X86_64
select STACK_VALIDATION
@@ -396,7 +382,22 @@
Enabling this option will increase the kernel's runtime memory usage
by roughly 2-4MB, depending on your kernel config.
-config GUESS_UNWINDER
+config UNWINDER_FRAME_POINTER
+ bool "Frame pointer unwinder"
+ select FRAME_POINTER
+ ---help---
+ This option enables the frame pointer unwinder for unwinding kernel
+ stack traces.
+
+ The unwinder itself is fast and it uses less RAM than the ORC
+ unwinder, but the kernel text size will grow by ~3% and the kernel's
+ overall performance will degrade by roughly 5-10%.
+
+ This option is recommended if you want to use the livepatch
+ consistency model, as this is currently the only way to get a
+ reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
+
+config UNWINDER_GUESS
bool "Guess unwinder"
depends on EXPERT
---help---
@@ -411,7 +412,7 @@
endchoice
config FRAME_POINTER
- depends on !ORC_UNWINDER && !GUESS_UNWINDER
+ depends on !UNWINDER_ORC && !UNWINDER_GUESS
bool
endmenu
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
index 550cd50..66c9e2a 100644
--- a/arch/x86/configs/tiny.config
+++ b/arch/x86/configs/tiny.config
@@ -1,5 +1,5 @@
CONFIG_NOHIGHMEM=y
# CONFIG_HIGHMEM4G is not set
# CONFIG_HIGHMEM64G is not set
-CONFIG_GUESS_UNWINDER=y
-# CONFIG_FRAME_POINTER_UNWINDER is not set
+CONFIG_UNWINDER_GUESS=y
+# CONFIG_UNWINDER_FRAME_POINTER is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 4a4b16e..e32fc1f 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -299,6 +299,7 @@
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_OPTIMIZE_INLINING=y
+CONFIG_UNWINDER_ORC=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 6e16003..3fd8bc5 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -142,56 +142,25 @@
UNWIND_HINT_REGS offset=\offset
.endm
- .macro RESTORE_EXTRA_REGS offset=0
- movq 0*8+\offset(%rsp), %r15
- movq 1*8+\offset(%rsp), %r14
- movq 2*8+\offset(%rsp), %r13
- movq 3*8+\offset(%rsp), %r12
- movq 4*8+\offset(%rsp), %rbp
- movq 5*8+\offset(%rsp), %rbx
- UNWIND_HINT_REGS offset=\offset extra=0
+ .macro POP_EXTRA_REGS
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
.endm
- .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
- .if \rstor_r11
- movq 6*8(%rsp), %r11
- .endif
- .if \rstor_r8910
- movq 7*8(%rsp), %r10
- movq 8*8(%rsp), %r9
- movq 9*8(%rsp), %r8
- .endif
- .if \rstor_rax
- movq 10*8(%rsp), %rax
- .endif
- .if \rstor_rcx
- movq 11*8(%rsp), %rcx
- .endif
- .if \rstor_rdx
- movq 12*8(%rsp), %rdx
- .endif
- movq 13*8(%rsp), %rsi
- movq 14*8(%rsp), %rdi
- UNWIND_HINT_IRET_REGS offset=16*8
- .endm
- .macro RESTORE_C_REGS
- RESTORE_C_REGS_HELPER 1,1,1,1,1
- .endm
- .macro RESTORE_C_REGS_EXCEPT_RAX
- RESTORE_C_REGS_HELPER 0,1,1,1,1
- .endm
- .macro RESTORE_C_REGS_EXCEPT_RCX
- RESTORE_C_REGS_HELPER 1,0,1,1,1
- .endm
- .macro RESTORE_C_REGS_EXCEPT_R11
- RESTORE_C_REGS_HELPER 1,1,0,1,1
- .endm
- .macro RESTORE_C_REGS_EXCEPT_RCX_R11
- RESTORE_C_REGS_HELPER 1,0,0,1,1
- .endm
-
- .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
- subq $-(15*8+\addskip), %rsp
+ .macro POP_C_REGS
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
.endm
.macro icebp
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index bcfc566..a2b30ec 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -221,10 +221,9 @@
TRACE_IRQS_ON /* user mode is traced as IRQs on */
movq RIP(%rsp), %rcx
movq EFLAGS(%rsp), %r11
- RESTORE_C_REGS_EXCEPT_RCX_R11
- movq RSP(%rsp), %rsp
+ addq $6*8, %rsp /* skip extra regs -- they were preserved */
UNWIND_HINT_EMPTY
- USERGS_SYSRET64
+ jmp .Lpop_c_regs_except_rcx_r11_and_sysret
1:
/*
@@ -246,17 +245,18 @@
call do_syscall_64 /* returns with IRQs disabled */
return_from_SYSCALL_64:
- RESTORE_EXTRA_REGS
TRACE_IRQS_IRETQ /* we're about to change IF */
/*
* Try to use SYSRET instead of IRET if we're returning to
- * a completely clean 64-bit userspace context.
+ * a completely clean 64-bit userspace context. If we're not,
+ * go to the slow exit path.
*/
movq RCX(%rsp), %rcx
movq RIP(%rsp), %r11
- cmpq %rcx, %r11 /* RCX == RIP */
- jne opportunistic_sysret_failed
+
+ cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
+ jne swapgs_restore_regs_and_return_to_usermode
/*
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
@@ -274,14 +274,14 @@
/* If this changed %rcx, it was not canonical */
cmpq %rcx, %r11
- jne opportunistic_sysret_failed
+ jne swapgs_restore_regs_and_return_to_usermode
cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
- jne opportunistic_sysret_failed
+ jne swapgs_restore_regs_and_return_to_usermode
movq R11(%rsp), %r11
cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
- jne opportunistic_sysret_failed
+ jne swapgs_restore_regs_and_return_to_usermode
/*
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
@@ -302,12 +302,12 @@
* would never get past 'stuck_here'.
*/
testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
- jnz opportunistic_sysret_failed
+ jnz swapgs_restore_regs_and_return_to_usermode
/* nothing to check for RSP */
cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
- jne opportunistic_sysret_failed
+ jne swapgs_restore_regs_and_return_to_usermode
/*
* We win! This label is here just for ease of understanding
@@ -315,14 +315,20 @@
*/
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
- RESTORE_C_REGS_EXCEPT_RCX_R11
- movq RSP(%rsp), %rsp
UNWIND_HINT_EMPTY
+ POP_EXTRA_REGS
+.Lpop_c_regs_except_rcx_r11_and_sysret:
+ popq %rsi /* skip r11 */
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rsi /* skip rcx */
+ popq %rdx
+ popq %rsi
+ popq %rdi
+ movq RSP-ORIG_RAX(%rsp), %rsp
USERGS_SYSRET64
-
-opportunistic_sysret_failed:
- SWAPGS
- jmp restore_c_regs_and_iret
END(entry_SYSCALL_64)
ENTRY(stub_ptregs_64)
@@ -423,8 +429,7 @@
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
TRACE_IRQS_ON /* user mode is traced as IRQS on */
- SWAPGS
- jmp restore_regs_and_iret
+ jmp swapgs_restore_regs_and_return_to_usermode
1:
/* kernel thread */
@@ -612,8 +617,21 @@
mov %rsp,%rdi
call prepare_exit_to_usermode
TRACE_IRQS_IRETQ
+
+GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+#ifdef CONFIG_DEBUG_ENTRY
+ /* Assert that pt_regs indicates user mode. */
+ testb $3, CS(%rsp)
+ jnz 1f
+ ud2
+1:
+#endif
SWAPGS
- jmp restore_regs_and_iret
+ POP_EXTRA_REGS
+ POP_C_REGS
+ addq $8, %rsp /* skip regs->orig_ax */
+ INTERRUPT_RETURN
+
/* Returning to kernel space */
retint_kernel:
@@ -633,15 +651,17 @@
*/
TRACE_IRQS_IRETQ
-/*
- * At this label, code paths which return to kernel and to user,
- * which come from interrupts/exception and from syscalls, merge.
- */
-GLOBAL(restore_regs_and_iret)
- RESTORE_EXTRA_REGS
-restore_c_regs_and_iret:
- RESTORE_C_REGS
- REMOVE_PT_GPREGS_FROM_STACK 8
+GLOBAL(restore_regs_and_return_to_kernel)
+#ifdef CONFIG_DEBUG_ENTRY
+ /* Assert that pt_regs indicates kernel mode. */
+ testb $3, CS(%rsp)
+ jz 1f
+ ud2
+1:
+#endif
+ POP_EXTRA_REGS
+ POP_C_REGS
+ addq $8, %rsp /* skip regs->orig_ax */
INTERRUPT_RETURN
ENTRY(native_iret)
@@ -818,7 +838,7 @@
ASM_CLAC
- .ifeq \has_error_code
+ .if \has_error_code == 0
pushq $-1 /* ORIG_RAX: no syscall to restart */
.endif
@@ -1059,6 +1079,7 @@
idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN
+idtentry xennmi do_nmi has_error_code=0
idtentry xendebug do_debug has_error_code=0
idtentry xenint3 do_int3 has_error_code=0
#endif
@@ -1112,17 +1133,14 @@
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF_DEBUG
testl %ebx, %ebx /* swapgs needed? */
- jnz paranoid_exit_no_swapgs
+ jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
SWAPGS_UNSAFE_STACK
- jmp paranoid_exit_restore
-paranoid_exit_no_swapgs:
+ jmp .Lparanoid_exit_restore
+.Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
-paranoid_exit_restore:
- RESTORE_EXTRA_REGS
- RESTORE_C_REGS
- REMOVE_PT_GPREGS_FROM_STACK 8
- INTERRUPT_RETURN
+.Lparanoid_exit_restore:
+ jmp restore_regs_and_return_to_kernel
END(paranoid_exit)
/*
@@ -1223,10 +1241,13 @@
jmp retint_user
END(error_exit)
-/* Runs on exception stack */
-/* XXX: broken on Xen PV */
+/*
+ * Runs on exception stack. Xen PV does not go through this path at all,
+ * so we can use real assembly here.
+ */
ENTRY(nmi)
UNWIND_HINT_IRET_REGS
+
/*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
* the iretq it performs will take us out of NMI context.
@@ -1284,7 +1305,7 @@
* stacks lest we corrupt the "NMI executing" variable.
*/
- SWAPGS_UNSAFE_STACK
+ swapgs
cld
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -1328,8 +1349,7 @@
* Return back to user mode. We must *not* do the normal exit
* work, because we don't want to enable interrupts.
*/
- SWAPGS
- jmp restore_regs_and_iret
+ jmp swapgs_restore_regs_and_return_to_usermode
.Lnmi_from_kernel:
/*
@@ -1450,7 +1470,7 @@
popq %rdx
/* We are returning to kernel mode, so this cannot result in a fault. */
- INTERRUPT_RETURN
+ iretq
first_nmi:
/* Restore rdx. */
@@ -1481,7 +1501,7 @@
pushfq /* RFLAGS */
pushq $__KERNEL_CS /* CS */
pushq $1f /* RIP */
- INTERRUPT_RETURN /* continues at repeat_nmi below */
+ iretq /* continues at repeat_nmi below */
UNWIND_HINT_IRET_REGS
1:
#endif
@@ -1544,29 +1564,34 @@
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
- RESTORE_EXTRA_REGS
- RESTORE_C_REGS
+ POP_EXTRA_REGS
+ POP_C_REGS
- /* Point RSP at the "iret" frame. */
- REMOVE_PT_GPREGS_FROM_STACK 6*8
+ /*
+ * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
+ * at the "iret" frame.
+ */
+ addq $6*8, %rsp
/*
* Clear "NMI executing". Set DF first so that we can easily
* distinguish the remaining code between here and IRET from
- * the SYSCALL entry and exit paths. On a native kernel, we
- * could just inspect RIP, but, on paravirt kernels,
- * INTERRUPT_RETURN can translate into a jump into a
- * hypercall page.
+ * the SYSCALL entry and exit paths.
+ *
+ * We arguably should just inspect RIP instead, but I (Andy) wrote
+ * this code when I had the misapprehension that Xen PV supported
+ * NMIs, and Xen PV would break that approach.
*/
std
movq $0, 5*8(%rsp) /* clear "NMI executing" */
/*
- * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
- * stack in a single instruction. We are returning to kernel
- * mode, so this cannot result in a fault.
+ * iretq reads the "iret" frame and exits the NMI stack in a
+ * single instruction. We are returning to kernel mode, so this
+ * cannot result in a fault. Similarly, we don't need to worry
+ * about espfix64 on the way back to kernel mode.
*/
- INTERRUPT_RETURN
+ iretq
END(nmi)
ENTRY(ignore_sysret)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index b5c7a56..568e130 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -337,8 +337,7 @@
/* Go back to user mode. */
TRACE_IRQS_ON
- SWAPGS
- jmp restore_regs_and_iret
+ jmp swapgs_restore_regs_and_return_to_usermode
END(entry_INT80_compat)
ENTRY(stub32_clone)
diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
index 331f1dc..6fb9b57 100644
--- a/arch/x86/entry/syscalls/Makefile
+++ b/arch/x86/entry/syscalls/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-out := $(obj)/../../include/generated/asm
-uapi := $(obj)/../../include/generated/uapi/asm
+out := arch/$(SRCARCH)/include/generated/asm
+uapi := arch/$(SRCARCH)/include/generated/uapi/asm
# Create output directory if not already present
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 5b0579a..3ac991d 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -45,7 +45,7 @@
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
do {
- asm volatile(RDRAND_LONG "\n\t"
+ asm volatile(RDRAND_LONG
CC_SET(c)
: CC_OUT(c) (ok), "=a" (*v));
if (ok)
@@ -59,7 +59,7 @@
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
do {
- asm volatile(RDRAND_INT "\n\t"
+ asm volatile(RDRAND_INT
CC_SET(c)
: CC_OUT(c) (ok), "=a" (*v));
if (ok)
@@ -71,7 +71,7 @@
static inline bool rdseed_long(unsigned long *v)
{
bool ok;
- asm volatile(RDSEED_LONG "\n\t"
+ asm volatile(RDSEED_LONG
CC_SET(c)
: CC_OUT(c) (ok), "=a" (*v));
return ok;
@@ -80,7 +80,7 @@
static inline bool rdseed_int(unsigned int *v)
{
bool ok;
- asm volatile(RDSEED_INT "\n\t"
+ asm volatile(RDSEED_INT
CC_SET(c)
: CC_OUT(c) (ok), "=a" (*v));
return ok;
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 2bcf473..3fa0398 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -143,7 +143,7 @@
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
{
bool negative;
- asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
+ asm volatile(LOCK_PREFIX "andb %2,%1"
CC_SET(s)
: CC_OUT(s) (negative), ADDR
: "ir" ((char) ~(1 << nr)) : "memory");
@@ -246,7 +246,7 @@
{
bool oldbit;
- asm("bts %2,%1\n\t"
+ asm("bts %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr));
@@ -286,7 +286,7 @@
{
bool oldbit;
- asm volatile("btr %2,%1\n\t"
+ asm volatile("btr %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr));
@@ -298,7 +298,7 @@
{
bool oldbit;
- asm volatile("btc %2,%1\n\t"
+ asm volatile("btc %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr) : "memory");
@@ -329,7 +329,7 @@
{
bool oldbit;
- asm volatile("bt %2,%1\n\t"
+ asm volatile("bt %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 9eef9cc..a600a6c 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -7,6 +7,7 @@
*/
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
#include <asm/processor.h>
#include <asm/user32.h>
#include <asm/unistd.h>
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 0dfa684..bf6a762 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -126,11 +126,10 @@
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
-#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
-#define setup_clear_cpu_cap(bit) do { \
- clear_cpu_cap(&boot_cpu_data, bit); \
- set_bit(bit, (unsigned long *)cpu_caps_cleared); \
-} while (0)
+
+extern void setup_clear_cpu_cap(unsigned int bit);
+extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
+
#define setup_force_cpu_cap(bit) do { \
set_cpu_cap(&boot_cpu_data, bit); \
set_bit(bit, (unsigned long *)cpu_caps_set); \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 793690f..cdf5be8 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,173 +13,176 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 18 /* N 32-bit words worth of info */
-#define NBUGINTS 1 /* N 32-bit bug flags */
+#define NCAPINTS 18 /* N 32-bit words worth of info */
+#define NBUGINTS 1 /* N 32-bit bug flags */
/*
* Note: If the comment begins with a quoted string, that string is used
* in /proc/cpuinfo instead of the macro name. If the string is "",
* this feature bit is not displayed in /proc/cpuinfo at all.
+ *
+ * When adding new features here that depend on other features,
+ * please update the table in kernel/cpu/cpuid-deps.c as well.
*/
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
- /* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
-#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
+/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
+#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
-#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
+#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
+#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
+#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
+#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
/* Other features, Linux-defined mapping, word 3 */
/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
-#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
-#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
-#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
-#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
-#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
-#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
-#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
+#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
-#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
-#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
-#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
-#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
-#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
-#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
-#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
-#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
+/* CPU types for specific tunings: */
+#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
+#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
+#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
+#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
+#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
+#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
+#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
+#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
+#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
+#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
+#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
+#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
+#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
+#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
+#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
+#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
+#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
+#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
+#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
+#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
+#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
-#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
-#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
+#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
-#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
-#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */
-#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
-#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
+/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
+#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
+#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
+#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
+#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
+#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
+#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
+#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -187,146 +190,153 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
-#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
-#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
-#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
-#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
+#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
+#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
+#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
-#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
+#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
-#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
+#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
-#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
+#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
+#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
-#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
-#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
+#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
-#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
-#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
-#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
-#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
-#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
-#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
-#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
+#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
+#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
+#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
+#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
+#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
+#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
+#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
-#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
-#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
-#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
-#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
+/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
+#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
+#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
+#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
+#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
-#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
+#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
-#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
-#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
+/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
+#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
+#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
-#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
-#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
-#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
-#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
-#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
-#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
+#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
+#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
-#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
-#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
-#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
+#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
+#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
+#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
+#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
+#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
+#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
-/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
-#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
-#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
-#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
+/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
+#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
/*
* BUG word(s)
*/
-#define X86_BUG(x) (NCAPINTS*32 + (x))
+#define X86_BUG(x) (NCAPINTS*32 + (x))
-#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
-#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
-#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
+#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
+#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
#ifdef CONFIG_X86_32
/*
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
* to avoid confusion.
*/
-#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
#endif
-#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
-#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
-#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index c1a125e4..3a091ce 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -253,7 +253,7 @@
* space open for things that want to use the area for 32-bit pointers.
*/
#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \
- (TASK_SIZE / 3 * 2))
+ (DEFAULT_MAP_WINDOW / 3 * 2))
/* This yields a mask that user programs can use to figure out what
instruction set this CPU supports. This could be done in user space,
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index dcd9fb5..b0c505f 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -104,6 +104,12 @@
FIX_GDT_REMAP_BEGIN,
FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+#ifdef CONFIG_ACPI_APEI_GHES
+ /* Used for GHES mapping from assorted contexts */
+ FIX_APEI_GHES_IRQ,
+ FIX_APEI_GHES_NMI,
+#endif
+
__end_of_permanent_fixed_addresses,
/*
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 8546faf..7948a17 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -6,7 +6,7 @@
#include <asm/orc_types.h>
struct mod_arch_specific {
-#ifdef CONFIG_ORC_UNWINDER
+#ifdef CONFIG_UNWINDER_ORC
unsigned int num_orcs;
int *orc_unwind_ip;
struct orc_entry *orc_unwind;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index fd81228..283efca 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -16,10 +16,9 @@
#include <linux/cpumask.h>
#include <asm/frame.h>
-static inline void load_sp0(struct tss_struct *tss,
- struct thread_struct *thread)
+static inline void load_sp0(unsigned long sp0)
{
- PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
+ PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
}
/* The paravirtualized CPUID instruction. */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 10cc3b9..6ec54d0 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -134,7 +134,7 @@
void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
- void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
+ void (*load_sp0)(unsigned long sp0);
void (*set_iopl_mask)(unsigned mask);
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 377f1ff..ba3c523 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -526,7 +526,7 @@
{
bool oldbit;
- asm volatile("bt "__percpu_arg(2)",%1\n\t"
+ asm volatile("bt "__percpu_arg(2)",%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 59df7b4..9e9b05f 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -200,10 +200,9 @@
#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
-#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC)
#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
_PAGE_DIRTY | _PAGE_ENC)
+#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index bdac19a..2db7cf7 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -431,7 +431,9 @@
struct thread_struct {
/* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+#ifdef CONFIG_X86_32
unsigned long sp0;
+#endif
unsigned long sp;
#ifdef CONFIG_X86_32
unsigned long sysenter_cs;
@@ -518,16 +520,9 @@
}
static inline void
-native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
+native_load_sp0(unsigned long sp0)
{
- tss->x86_tss.sp0 = thread->sp0;
-#ifdef CONFIG_X86_32
- /* Only happens when SEP is enabled, no need to test "SEP"arately: */
- if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
- tss->x86_tss.ss1 = thread->sysenter_cs;
- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
- }
-#endif
+ this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
}
static inline void native_swapgs(void)
@@ -547,15 +542,20 @@
#endif
}
+static inline bool on_thread_stack(void)
+{
+ return (unsigned long)(current_top_of_stack() -
+ current_stack_pointer) < THREAD_SIZE;
+}
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
#define __cpuid native_cpuid
-static inline void load_sp0(struct tss_struct *tss,
- struct thread_struct *thread)
+static inline void load_sp0(unsigned long sp0)
{
- native_load_sp0(tss, thread);
+ native_load_sp0(sp0);
}
#define set_iopl_mask native_set_iopl_mask
@@ -804,6 +804,15 @@
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
TOP_OF_KERNEL_STACK_PADDING)
+#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
+
+#define task_pt_regs(task) \
+({ \
+ unsigned long __ptr = (unsigned long)task_stack_page(task); \
+ __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
+ ((struct pt_regs *)__ptr) - 1; \
+})
+
#ifdef CONFIG_X86_32
/*
* User space process size: 3GB (default).
@@ -823,23 +832,6 @@
.addr_limit = KERNEL_DS, \
}
-/*
- * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
- * This is necessary to guarantee that the entire "struct pt_regs"
- * is accessible even if the CPU haven't stored the SS/ESP registers
- * on the stack (interrupt gate does not save these registers
- * when switching to the same priv ring).
- * Therefore beware: accessing the ss/esp fields of the
- * "struct pt_regs" is possible, but they may contain the
- * completely wrong values.
- */
-#define task_pt_regs(task) \
-({ \
- unsigned long __ptr = (unsigned long)task_stack_page(task); \
- __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
- ((struct pt_regs *)__ptr) - 1; \
-})
-
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
#else
@@ -873,11 +865,9 @@
#define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \
- .sp0 = TOP_OF_INIT_STACK, \
.addr_limit = KERNEL_DS, \
}
-#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
extern unsigned long KSTK_ESP(struct task_struct *task);
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index c0e3c45..14131dd 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -136,9 +136,9 @@
#endif
}
-#ifdef CONFIG_X86_64
static inline bool user_64bit_mode(struct pt_regs *regs)
{
+#ifdef CONFIG_X86_64
#ifndef CONFIG_PARAVIRT
/*
* On non-paravirt systems, this is the only long mode CPL 3
@@ -149,8 +149,12 @@
/* Headers are too twisted for this to go in paravirt.h. */
return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
#endif
+#else /* !CONFIG_X86_64 */
+ return false;
+#endif
}
+#ifdef CONFIG_X86_64
#define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp
#endif
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index d8f3a6a..f91c365 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -29,7 +29,7 @@
#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
do { \
bool c; \
- asm volatile (fullop ";" CC_SET(cc) \
+ asm volatile (fullop CC_SET(cc) \
: [counter] "+m" (var), CC_OUT(cc) (c) \
: __VA_ARGS__ : clobbers); \
return c; \
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 899084b..8c6bd68 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_SWITCH_TO_H
#define _ASM_X86_SWITCH_TO_H
+#include <linux/sched/task_stack.h>
+
struct task_struct; /* one of the stranger aspects of C forward declarations */
struct task_struct *__switch_to_asm(struct task_struct *prev,
@@ -73,4 +75,26 @@
((last) = __switch_to_asm((prev), (next))); \
} while (0)
+#ifdef CONFIG_X86_32
+static inline void refresh_sysenter_cs(struct thread_struct *thread)
+{
+ /* Only happens when SEP is enabled, no need to test "SEP"arately: */
+ if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
+ return;
+
+ this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
+ wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+}
+#endif
+
+/* This is used when switching tasks or entering/exiting vm86 mode. */
+static inline void update_sp0(struct task_struct *task)
+{
+#ifdef CONFIG_X86_32
+ load_sp0(task->thread.sp0);
+#else
+ load_sp0(task_top_of_stack(task));
+#endif
+}
+
#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 91dfcaf..bad25bb 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -21,7 +21,7 @@
asmlinkage long sys_iopl(unsigned int);
/* kernel/ldt.c */
-asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
+asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
/* kernel/signal.c */
asmlinkage long sys_rt_sigreturn(void);
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index fa60398..069c04b 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -34,11 +34,6 @@
)
);
-DEFINE_EVENT(x86_fpu, x86_fpu_state,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
@@ -74,11 +69,6 @@
TP_ARGS(fpu)
);
-DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index b0cced9..1fadd31 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -38,9 +38,9 @@
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
asmlinkage void xen_divide_error(void);
+asmlinkage void xen_xennmi(void);
asmlinkage void xen_xendebug(void);
asmlinkage void xen_xenint3(void);
-asmlinkage void xen_nmi(void);
asmlinkage void xen_overflow(void);
asmlinkage void xen_bounds(void);
asmlinkage void xen_invalid_op(void);
@@ -145,4 +145,22 @@
X86_TRAP_IRET = 32, /* 32, IRET Exception */
};
+/*
+ * Page fault error code bits:
+ *
+ * bit 0 == 0: no page found 1: protection fault
+ * bit 1 == 0: read access 1: write access
+ * bit 2 == 0: kernel-mode access 1: user-mode access
+ * bit 3 == 1: use of reserved bit detected
+ * bit 4 == 1: fault was an instruction fetch
+ * bit 5 == 1: protection keys block access
+ */
+enum x86_pf_error_code {
+ X86_PF_PROT = 1 << 0,
+ X86_PF_WRITE = 1 << 1,
+ X86_PF_USER = 1 << 2,
+ X86_PF_RSVD = 1 << 3,
+ X86_PF_INSTR = 1 << 4,
+ X86_PF_PK = 1 << 5,
+};
#endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 87adc0d..e9cc6fe 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -13,11 +13,11 @@
struct task_struct *task;
int graph_idx;
bool error;
-#if defined(CONFIG_ORC_UNWINDER)
+#if defined(CONFIG_UNWINDER_ORC)
bool signal, full_regs;
unsigned long sp, bp, ip;
struct pt_regs *regs;
-#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
+#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
bool got_irq;
unsigned long *bp, *orig_sp, ip;
struct pt_regs *regs;
@@ -51,7 +51,7 @@
__unwind_start(state, task, regs, first_frame);
}
-#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
+#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
{
if (unwind_done(state))
@@ -66,7 +66,7 @@
}
#endif
-#ifdef CONFIG_ORC_UNWINDER
+#ifdef CONFIG_UNWINDER_ORC
void unwind_init(void);
void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
void *orc, size_t orc_size);
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 6f33553..53b4ca5 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -152,5 +152,8 @@
#define CX86_ARR_BASE 0xc4
#define CX86_RCR_BASE 0xdc
+#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
+ X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
+ X86_CR0_PG)
#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5f70044..295abaa 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -25,9 +25,9 @@
KASAN_SANITIZE_head$(BITS).o := n
KASAN_SANITIZE_dumpstack.o := n
KASAN_SANITIZE_dumpstack_$(BITS).o := n
-KASAN_SANITIZE_stacktrace.o := n
+KASAN_SANITIZE_stacktrace.o := n
+KASAN_SANITIZE_paravirt.o := n
-OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_test_nx.o := y
@@ -128,9 +128,9 @@
obj-$(CONFIG_TRACING) += tracepoint.o
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
-obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o
-obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o
-obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o
+obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
+obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
+obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 236999c..90cb82db 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -22,7 +22,8 @@
obj-y += rdrand.o
obj-y += match.o
obj-y += bugs.o
-obj-y += aperfmperf.o
+obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
+obj-y += cpuid-deps.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 957813e..0ee8332 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -42,6 +42,10 @@
s64 time_delta = ktime_ms_delta(now, s->time);
unsigned long flags;
+ /* Don't bother re-computing within the cache threshold time. */
+ if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
+ return;
+
local_irq_save(flags);
rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf);
@@ -70,7 +74,6 @@
unsigned int arch_freq_get_on_cpu(int cpu)
{
- s64 time_delta;
unsigned int khz;
if (!cpu_khz)
@@ -79,12 +82,6 @@
if (!static_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
- /* Don't bother re-computing within the cache threshold time. */
- time_delta = ktime_ms_delta(ktime_get(), per_cpu(samples.time, cpu));
- khz = per_cpu(samples.khz, cpu);
- if (khz && time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
- return khz;
-
smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
khz = per_cpu(samples.khz, cpu);
if (khz)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c9176ba..cdf79ab 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1301,18 +1301,16 @@
pr_cont(")\n");
}
-static __init int setup_disablecpuid(char *arg)
+/*
+ * clearcpuid= was already parsed in fpu__init_parse_early_param.
+ * But we need to keep a dummy __setup around otherwise it would
+ * show up as an environment variable for init.
+ */
+static __init int setup_clearcpuid(char *arg)
{
- int bit;
-
- if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
- setup_clear_cpu_cap(bit);
- else
- return 0;
-
return 1;
}
-__setup("clearcpuid=", setup_disablecpuid);
+__setup("clearcpuid=", setup_clearcpuid);
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1572,9 +1570,13 @@
initialize_tlbstate_and_flush();
enter_lazy_tlb(&init_mm, me);
- load_sp0(t, ¤t->thread);
+ /*
+ * Initialize the TSS. Don't bother initializing sp0, as the initial
+ * task never enters user mode.
+ */
set_tss_desc(cpu, t);
load_TR_desc();
+
load_mm_ldt(&init_mm);
clear_all_debug_regs();
@@ -1596,7 +1598,6 @@
int cpu = smp_processor_id();
struct task_struct *curr = current;
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
- struct thread_struct *thread = &curr->thread;
wait_for_master_cpu(cpu);
@@ -1627,9 +1628,13 @@
initialize_tlbstate_and_flush();
enter_lazy_tlb(&init_mm, curr);
- load_sp0(t, thread);
+ /*
+ * Initialize the TSS. Don't bother initializing sp0, as the initial
+ * task never enters user mode.
+ */
set_tss_desc(cpu, t);
load_TR_desc();
+
load_mm_ldt(&init_mm);
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
new file mode 100644
index 0000000..904b0a3c4
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -0,0 +1,121 @@
+/* Declare dependencies between CPUIDs */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/cpufeature.h>
+
+struct cpuid_dep {
+ unsigned int feature;
+ unsigned int depends;
+};
+
+/*
+ * Table of CPUID features that depend on others.
+ *
+ * This only includes dependencies that can be usefully disabled, not
+ * features part of the base set (like FPU).
+ *
+ * Note this all is not __init / __initdata because it can be
+ * called from cpu hotplug. It shouldn't do anything in this case,
+ * but it's difficult to tell that to the init reference checker.
+ */
+const static struct cpuid_dep cpuid_deps[] = {
+ { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
+ { X86_FEATURE_AVX, X86_FEATURE_XSAVE },
+ { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
+ { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
+ { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
+ { X86_FEATURE_XMM, X86_FEATURE_FXSR },
+ { X86_FEATURE_XMM2, X86_FEATURE_XMM },
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
+ { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
+ { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
+ { X86_FEATURE_F16C, X86_FEATURE_XMM2, },
+ { X86_FEATURE_AES, X86_FEATURE_XMM2 },
+ { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
+ { X86_FEATURE_FMA, X86_FEATURE_AVX },
+ { X86_FEATURE_AVX2, X86_FEATURE_AVX, },
+ { X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
+ { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
+ {}
+};
+
+static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ /*
+ * Note: This could use the non atomic __*_bit() variants, but the
+ * rest of the cpufeature code uses atomics as well, so keep it for
+ * consistency. Cleanup all of it separately.
+ */
+ if (!c) {
+ clear_cpu_cap(&boot_cpu_data, feature);
+ set_bit(feature, (unsigned long *)cpu_caps_cleared);
+ } else {
+ clear_bit(feature, (unsigned long *)c->x86_capability);
+ }
+}
+
+/* Take the capabilities and the BUG bits into account */
+#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
+
+static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
+ const struct cpuid_dep *d;
+ bool changed;
+
+ if (WARN_ON(feature >= MAX_FEATURE_BITS))
+ return;
+
+ clear_feature(c, feature);
+
+ /* Collect all features to disable, handling dependencies */
+ memset(disable, 0, sizeof(disable));
+ __set_bit(feature, disable);
+
+ /* Loop until we get a stable state. */
+ do {
+ changed = false;
+ for (d = cpuid_deps; d->feature; d++) {
+ if (!test_bit(d->depends, disable))
+ continue;
+ if (__test_and_set_bit(d->feature, disable))
+ continue;
+
+ changed = true;
+ clear_feature(c, d->feature);
+ }
+ } while (changed);
+}
+
+void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ do_clear_cpu_cap(c, feature);
+}
+
+void setup_clear_cpu_cap(unsigned int feature)
+{
+ do_clear_cpu_cap(NULL, feature);
+}
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 4378a72..6b7e17b 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -78,11 +78,9 @@
seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
if (cpu_has(c, X86_FEATURE_TSC)) {
- unsigned int freq = arch_freq_get_on_cpu(cpu);
+ unsigned int freq = cpufreq_quick_get(cpu);
if (!freq)
- freq = cpufreq_quick_get(cpu);
- if (!freq)
freq = cpu_khz;
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
freq / 1000, (freq % 1000));
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 7affb7e..6abd835 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -249,6 +249,10 @@
*/
static void __init fpu__init_parse_early_param(void)
{
+ char arg[32];
+ char *argptr = arg;
+ int bit;
+
if (cmdline_find_option_bool(boot_command_line, "no387"))
setup_clear_cpu_cap(X86_FEATURE_FPU);
@@ -266,6 +270,13 @@
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+ if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
+ sizeof(arg)) &&
+ get_option(&argptr, &bit) &&
+ bit >= 0 &&
+ bit < NCAPINTS * 32)
+ setup_clear_cpu_cap(bit);
}
/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f1d5476..87a57b7 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -15,6 +15,7 @@
#include <asm/fpu/xstate.h>
#include <asm/tlbflush.h>
+#include <asm/cpufeature.h>
/*
* Although we spell it out in here, the Processor Trace
@@ -36,6 +37,19 @@
"unknown xstate feature" ,
};
+static short xsave_cpuid_features[] __initdata = {
+ X86_FEATURE_FPU,
+ X86_FEATURE_XMM,
+ X86_FEATURE_AVX,
+ X86_FEATURE_MPX,
+ X86_FEATURE_MPX,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_INTEL_PT,
+ X86_FEATURE_PKU,
+};
+
/*
* Mask of xstate features supported by the CPU and the kernel:
*/
@@ -59,26 +73,6 @@
void fpu__xstate_clear_all_cpu_caps(void)
{
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
- setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
- setup_clear_cpu_cap(X86_FEATURE_XSAVES);
- setup_clear_cpu_cap(X86_FEATURE_AVX);
- setup_clear_cpu_cap(X86_FEATURE_AVX2);
- setup_clear_cpu_cap(X86_FEATURE_AVX512F);
- setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
- setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
- setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
- setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
- setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
- setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
- setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
- setup_clear_cpu_cap(X86_FEATURE_MPX);
- setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
- setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
- setup_clear_cpu_cap(X86_FEATURE_PKU);
- setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
- setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
- setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
}
/*
@@ -726,6 +720,7 @@
unsigned int eax, ebx, ecx, edx;
static int on_boot_cpu __initdata = 1;
int err;
+ int i;
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -759,6 +754,14 @@
goto out_disable;
}
+ /*
+ * Clear XSAVE features that are disabled in the normal CPUID.
+ */
+ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
+ if (!boot_cpu_has(xsave_cpuid_features[i]))
+ xfeatures_mask &= ~BIT(i);
+ }
+
xfeatures_mask &= fpu__get_supported_xfeatures_mask();
/* Enable xstate instructions to be able to continue with initialization: */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f1d528b..c290209 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -212,9 +212,6 @@
#endif
.Ldefault_entry:
-#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
- X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
- X86_CR0_PG)
movl $(CR0_STATE & ~X86_CR0_PG),%eax
movl %eax,%cr0
@@ -402,7 +399,7 @@
# 24(%rsp) error code
i = 0
.rept NUM_EXCEPTION_VECTORS
- .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
+ .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
pushl $0 # Dummy error code, to make stack frame uniform
.endif
pushl $i # 20(%esp) Vector number
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 6dde3f3..7dca675 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -38,11 +38,12 @@
*
*/
-#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
+#endif
L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text
@@ -50,6 +51,7 @@
.code64
.globl startup_64
startup_64:
+ UNWIND_HINT_EMPTY
/*
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
* and someone has loaded an identity mapped page table
@@ -89,6 +91,7 @@
addq $(early_top_pgt - __START_KERNEL_map), %rax
jmp 1f
ENTRY(secondary_startup_64)
+ UNWIND_HINT_EMPTY
/*
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
* and someone has loaded a mapped page table.
@@ -133,6 +136,7 @@
movq $1f, %rax
jmp *%rax
1:
+ UNWIND_HINT_EMPTY
/* Check if nx is implemented */
movl $0x80000001, %eax
@@ -150,9 +154,6 @@
1: wrmsr /* Make changes effective */
/* Setup cr0 */
-#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
- X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
- X86_CR0_PG)
movl $CR0_STATE, %eax
/* Make changes effective */
movq %rax, %cr0
@@ -235,7 +236,7 @@
pushq %rax # target address in negative space
lretq
.Lafter_lret:
-ENDPROC(secondary_startup_64)
+END(secondary_startup_64)
#include "verify_cpu.S"
@@ -247,6 +248,7 @@
*/
ENTRY(start_cpu0)
movq initial_stack(%rip), %rsp
+ UNWIND_HINT_EMPTY
jmp .Ljump_to_C_code
ENDPROC(start_cpu0)
#endif
@@ -266,26 +268,24 @@
.quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
__FINITDATA
-bad_address:
- jmp bad_address
-
__INIT
ENTRY(early_idt_handler_array)
- # 104(%rsp) %rflags
- # 96(%rsp) %cs
- # 88(%rsp) %rip
- # 80(%rsp) error code
i = 0
.rept NUM_EXCEPTION_VECTORS
- .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
- pushq $0 # Dummy error code, to make stack frame uniform
+ .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
+ UNWIND_HINT_IRET_REGS
+ pushq $0 # Dummy error code, to make stack frame uniform
+ .else
+ UNWIND_HINT_IRET_REGS offset=8
.endif
pushq $i # 72(%rsp) Vector number
jmp early_idt_handler_common
+ UNWIND_HINT_IRET_REGS
i = i + 1
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
-ENDPROC(early_idt_handler_array)
+ UNWIND_HINT_IRET_REGS offset=16
+END(early_idt_handler_array)
early_idt_handler_common:
/*
@@ -313,6 +313,7 @@
pushq %r13 /* pt_regs->r13 */
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
+ UNWIND_HINT_REGS
cmpq $14,%rsi /* Page fault? */
jnz 10f
@@ -327,8 +328,8 @@
20:
decl early_recursion_flag(%rip)
- jmp restore_regs_and_iret
-ENDPROC(early_idt_handler_common)
+ jmp restore_regs_and_return_to_kernel
+END(early_idt_handler_common)
__INITDATA
@@ -362,10 +363,7 @@
.data
-#ifndef CONFIG_XEN
-NEXT_PAGE(init_top_pgt)
- .fill 512,8,0
-#else
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
NEXT_PAGE(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
@@ -382,6 +380,9 @@
* Don't set NX because code runs from these pages.
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+#else
+NEXT_PAGE(init_top_pgt)
+ .fill 512,8,0
#endif
#ifdef CONFIG_X86_5LEVEL
@@ -435,7 +436,7 @@
EXPORT_SYMBOL(phys_base)
#include "../../x86/xen/xen-head.S"
-
+
__PAGE_ALIGNED_BSS
NEXT_PAGE(empty_zero_page)
.skip PAGE_SIZE
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 6107ee1..014cb2f 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -92,8 +92,6 @@
INTG(X86_TRAP_DF, double_fault),
#endif
INTG(X86_TRAP_DB, debug),
- INTG(X86_TRAP_NMI, nmi),
- INTG(X86_TRAP_BP, int3),
#ifdef CONFIG_X86_MCE
INTG(X86_TRAP_MC, &machine_check),
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 4d17bac..ae5615b 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -13,6 +13,7 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp.h>
+#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
@@ -295,8 +296,8 @@
return error;
}
-asmlinkage int sys_modify_ldt(int func, void __user *ptr,
- unsigned long bytecount)
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
+ unsigned long , bytecount)
{
int ret = -ENOSYS;
@@ -314,5 +315,14 @@
ret = write_ldt(ptr, bytecount, 0);
break;
}
- return ret;
+ /*
+ * The SYSCALL_DEFINE() macros give us an 'unsigned long'
+ * return type, but tht ABI for sys_modify_ldt() expects
+ * 'int'. This cast gives us an int-sized value in %rax
+ * for the return code. The 'unsigned' is necessary so
+ * the compiler does not try to sign-extend the negative
+ * return codes into the high half of the register when
+ * taking the value from int->long.
+ */
+ return (unsigned int)ret;
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c676853..97fb3e5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -49,7 +49,13 @@
*/
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
.x86_tss = {
- .sp0 = TOP_OF_INIT_STACK,
+ /*
+ * .sp0 is only used when entering ring 0 from a lower
+ * privilege level. Since the init task never runs anything
+ * but ring 0 code, there is no need for a valid value here.
+ * Poison it.
+ */
+ .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 1196625..45bf0c5 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -284,9 +284,11 @@
/*
* Reload esp0 and cpu_current_top_of_stack. This changes
- * current_thread_info().
+ * current_thread_info(). Refresh the SYSENTER configuration in
+ * case prev or next is vm86.
*/
- load_sp0(tss, next);
+ update_sp0(next_p);
+ refresh_sysenter_cs(next);
this_cpu_write(cpu_current_top_of_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 302e7b2..eeeb34f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -274,7 +274,6 @@
struct inactive_task_frame *frame;
struct task_struct *me = current;
- p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs = task_pt_regs(p);
fork_frame = container_of(childregs, struct fork_frame, regs);
frame = &fork_frame->frame;
@@ -464,8 +463,8 @@
*/
this_cpu_write(current_task, next_p);
- /* Reload esp0 and ss1. This changes current_thread_info(). */
- load_sp0(tss, next);
+ /* Reload sp0. */
+ update_sp0(next_p);
/*
* Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ad59edd..d56c1d2 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -194,6 +194,12 @@
smp_store_cpu_info(cpuid);
/*
+ * The topology information must be up to date before
+ * calibrate_delay() and notify_cpu_starting().
+ */
+ set_cpu_sibling_map(raw_smp_processor_id());
+
+ /*
* Get our bogomips.
* Update loops_per_jiffy in cpu_data. Previous call to
* smp_store_cpu_info() stored a value that is close but not as
@@ -203,11 +209,6 @@
cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
pr_debug("Stack at about %p\n", &cpuid);
- /*
- * This must be done before setting cpu_online_mask
- * or calling notify_cpu_starting.
- */
- set_cpu_sibling_map(raw_smp_processor_id());
wmb();
notify_cpu_starting(cpuid);
@@ -961,8 +962,7 @@
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
- per_cpu(cpu_current_top_of_stack, cpu) =
- (unsigned long)task_stack_page(idle) + THREAD_SIZE;
+ per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
#else
initial_gs = per_cpu_offset(cpu);
#endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 67db4f4..d366adf 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -141,8 +141,7 @@
* will catch asm bugs and any attempt to use ist_preempt_enable
* from double_fault.
*/
- BUG_ON((unsigned long)(current_top_of_stack() -
- current_stack_pointer) >= THREAD_SIZE);
+ BUG_ON(!on_thread_stack());
preempt_enable_no_resched();
}
@@ -209,9 +208,6 @@
if (fixup_exception(regs, trapnr))
return 0;
- if (fixup_bug(regs, trapnr))
- return 0;
-
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
die(str, regs, error_code);
@@ -292,6 +288,13 @@
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
+ /*
+ * WARN*()s end up here; fix them up before we call the
+ * notifier chain.
+ */
+ if (!user_mode(regs) && fixup_bug(regs, trapnr))
+ return;
+
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
NOTIFY_STOP) {
cond_local_irq_enable(regs);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 796d96b..ad2b925 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1346,12 +1346,10 @@
unsigned long calibrate_delay_is_known(void)
{
int sibling, cpu = smp_processor_id();
- struct cpumask *mask = topology_core_cpumask(cpu);
+ int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
+ const struct cpumask *mask = topology_core_cpumask(cpu);
- if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
- return 0;
-
- if (!mask)
+ if (tsc_disabled || !constant_tsc || !mask)
return 0;
sibling = cpumask_any_but(mask, cpu);
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index b95007e..a3f973b 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -279,7 +279,7 @@
if (!stack_access_ok(state, addr, sizeof(long)))
return false;
- *val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr);
+ *val = READ_ONCE_NOCHECK(*(unsigned long *)addr);
return true;
}
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 014ea59..3d3c2f7 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -33,7 +33,7 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
-verify_cpu:
+ENTRY(verify_cpu)
pushf # Save caller passed flags
push $0 # Kill any dangerous flags
popf
@@ -139,3 +139,4 @@
popf # Restore caller passed flags
xorl %eax, %eax
ret
+ENDPROC(verify_cpu)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 6824474..5edb27f 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -55,6 +55,7 @@
#include <asm/irq.h>
#include <asm/traps.h>
#include <asm/vm86.h>
+#include <asm/switch_to.h>
/*
* Known problems:
@@ -94,7 +95,6 @@
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
{
- struct tss_struct *tss;
struct task_struct *tsk = current;
struct vm86plus_struct __user *user;
struct vm86 *vm86 = current->thread.vm86;
@@ -146,12 +146,13 @@
do_exit(SIGSEGV);
}
- tss = &per_cpu(cpu_tss, get_cpu());
+ preempt_disable();
tsk->thread.sp0 = vm86->saved_sp0;
tsk->thread.sysenter_cs = __KERNEL_CS;
- load_sp0(tss, &tsk->thread);
+ update_sp0(tsk);
+ refresh_sysenter_cs(&tsk->thread);
vm86->saved_sp0 = 0;
- put_cpu();
+ preempt_enable();
memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs));
@@ -237,7 +238,6 @@
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
{
- struct tss_struct *tss;
struct task_struct *tsk = current;
struct vm86 *vm86 = tsk->thread.vm86;
struct kernel_vm86_regs vm86regs;
@@ -365,15 +365,17 @@
vm86->saved_sp0 = tsk->thread.sp0;
lazy_save_gs(vm86->regs32.gs);
- tss = &per_cpu(cpu_tss, get_cpu());
/* make room for real-mode segments */
+ preempt_disable();
tsk->thread.sp0 += 16;
- if (static_cpu_has(X86_FEATURE_SEP))
+ if (static_cpu_has(X86_FEATURE_SEP)) {
tsk->thread.sysenter_cs = 0;
+ refresh_sysenter_cs(&tsk->thread);
+ }
- load_sp0(tss, &tsk->thread);
- put_cpu();
+ update_sp0(tsk);
+ preempt_enable();
if (vm86->flags & VM86_SCREEN_BITMAP)
mark_screen_rdonly(tsk->mm);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b0ff3786..3109ba6 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -30,26 +30,6 @@
#include <asm/trace/exceptions.h>
/*
- * Page fault error code bits:
- *
- * bit 0 == 0: no page found 1: protection fault
- * bit 1 == 0: read access 1: write access
- * bit 2 == 0: kernel-mode access 1: user-mode access
- * bit 3 == 1: use of reserved bit detected
- * bit 4 == 1: fault was an instruction fetch
- * bit 5 == 1: protection keys block access
- */
-enum x86_pf_error_code {
-
- PF_PROT = 1 << 0,
- PF_WRITE = 1 << 1,
- PF_USER = 1 << 2,
- PF_RSVD = 1 << 3,
- PF_INSTR = 1 << 4,
- PF_PK = 1 << 5,
-};
-
-/*
* Returns 0 if mmiotrace is disabled, or if the fault is not
* handled by mmiotrace:
*/
@@ -150,7 +130,7 @@
* If it was a exec (instruction fetch) fault on NX page, then
* do not ignore the fault:
*/
- if (error_code & PF_INSTR)
+ if (error_code & X86_PF_INSTR)
return 0;
instr = (void *)convert_ip_to_linear(current, regs);
@@ -180,7 +160,7 @@
* siginfo so userspace can discover which protection key was set
* on the PTE.
*
- * If we get here, we know that the hardware signaled a PF_PK
+ * If we get here, we know that the hardware signaled a X86_PF_PK
* fault and that there was a VMA once we got in the fault
* handler. It does *not* guarantee that the VMA we find here
* was the one that we faulted on.
@@ -205,7 +185,7 @@
/*
* force_sig_info_fault() is called from a number of
* contexts, some of which have a VMA and some of which
- * do not. The PF_PK handing happens after we have a
+ * do not. The X86_PF_PK handing happens after we have a
* valid VMA, so we should never reach this without a
* valid VMA.
*/
@@ -698,7 +678,7 @@
if (!oops_may_print())
return;
- if (error_code & PF_INSTR) {
+ if (error_code & X86_PF_INSTR) {
unsigned int level;
pgd_t *pgd;
pte_t *pte;
@@ -780,7 +760,7 @@
*/
if (current->thread.sig_on_uaccess_err && signal) {
tsk->thread.trap_nr = X86_TRAP_PF;
- tsk->thread.error_code = error_code | PF_USER;
+ tsk->thread.error_code = error_code | X86_PF_USER;
tsk->thread.cr2 = address;
/* XXX: hwpoison faults will set the wrong code. */
@@ -898,7 +878,7 @@
struct task_struct *tsk = current;
/* User mode accesses just cause a SIGSEGV */
- if (error_code & PF_USER) {
+ if (error_code & X86_PF_USER) {
/*
* It's possible to have interrupts off here:
*/
@@ -919,7 +899,7 @@
* Instruction fetch faults in the vsyscall page might need
* emulation.
*/
- if (unlikely((error_code & PF_INSTR) &&
+ if (unlikely((error_code & X86_PF_INSTR) &&
((address & ~0xfff) == VSYSCALL_ADDR))) {
if (emulate_vsyscall(regs, address))
return;
@@ -932,7 +912,7 @@
* are always protection faults.
*/
if (address >= TASK_SIZE_MAX)
- error_code |= PF_PROT;
+ error_code |= X86_PF_PROT;
if (likely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
@@ -993,11 +973,11 @@
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return false;
- if (error_code & PF_PK)
+ if (error_code & X86_PF_PK)
return true;
/* this checks permission keys on the VMA: */
- if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
- (error_code & PF_INSTR), foreign))
+ if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
+ (error_code & X86_PF_INSTR), foreign))
return true;
return false;
}
@@ -1025,7 +1005,7 @@
int code = BUS_ADRERR;
/* Kernel mode? Handle exceptions or die: */
- if (!(error_code & PF_USER)) {
+ if (!(error_code & X86_PF_USER)) {
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
return;
}
@@ -1053,14 +1033,14 @@
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
unsigned long address, u32 *pkey, unsigned int fault)
{
- if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
+ if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
no_context(regs, error_code, address, 0, 0);
return;
}
if (fault & VM_FAULT_OOM) {
/* Kernel mode? Handle exceptions or die: */
- if (!(error_code & PF_USER)) {
+ if (!(error_code & X86_PF_USER)) {
no_context(regs, error_code, address,
SIGSEGV, SEGV_MAPERR);
return;
@@ -1085,16 +1065,16 @@
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
{
- if ((error_code & PF_WRITE) && !pte_write(*pte))
+ if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
return 0;
- if ((error_code & PF_INSTR) && !pte_exec(*pte))
+ if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
return 0;
/*
* Note: We do not do lazy flushing on protection key
- * changes, so no spurious fault will ever set PF_PK.
+ * changes, so no spurious fault will ever set X86_PF_PK.
*/
- if ((error_code & PF_PK))
+ if ((error_code & X86_PF_PK))
return 1;
return 1;
@@ -1140,8 +1120,8 @@
* change, so user accesses are not expected to cause spurious
* faults.
*/
- if (error_code != (PF_WRITE | PF_PROT)
- && error_code != (PF_INSTR | PF_PROT))
+ if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
+ error_code != (X86_PF_INSTR | X86_PF_PROT))
return 0;
pgd = init_mm.pgd + pgd_index(address);
@@ -1201,19 +1181,19 @@
* always an unconditional error and can never result in
* a follow-up action to resolve the fault, like a COW.
*/
- if (error_code & PF_PK)
+ if (error_code & X86_PF_PK)
return 1;
/*
* Make sure to check the VMA so that we do not perform
- * faults just to hit a PF_PK as soon as we fill in a
+ * faults just to hit a X86_PF_PK as soon as we fill in a
* page.
*/
- if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
- (error_code & PF_INSTR), foreign))
+ if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
+ (error_code & X86_PF_INSTR), foreign))
return 1;
- if (error_code & PF_WRITE) {
+ if (error_code & X86_PF_WRITE) {
/* write, present and write, not present: */
if (unlikely(!(vma->vm_flags & VM_WRITE)))
return 1;
@@ -1221,7 +1201,7 @@
}
/* read, present: */
- if (unlikely(error_code & PF_PROT))
+ if (unlikely(error_code & X86_PF_PROT))
return 1;
/* read, not present: */
@@ -1244,7 +1224,7 @@
if (!static_cpu_has(X86_FEATURE_SMAP))
return false;
- if (error_code & PF_USER)
+ if (error_code & X86_PF_USER)
return false;
if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
@@ -1297,7 +1277,7 @@
* protection error (error_code & 9) == 0.
*/
if (unlikely(fault_in_kernel_space(address))) {
- if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
+ if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
if (vmalloc_fault(address) >= 0)
return;
@@ -1325,7 +1305,7 @@
if (unlikely(kprobes_fault(regs)))
return;
- if (unlikely(error_code & PF_RSVD))
+ if (unlikely(error_code & X86_PF_RSVD))
pgtable_bad(regs, error_code, address);
if (unlikely(smap_violation(error_code, regs))) {
@@ -1351,7 +1331,7 @@
*/
if (user_mode(regs)) {
local_irq_enable();
- error_code |= PF_USER;
+ error_code |= X86_PF_USER;
flags |= FAULT_FLAG_USER;
} else {
if (regs->flags & X86_EFLAGS_IF)
@@ -1360,9 +1340,9 @@
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
- if (error_code & PF_WRITE)
+ if (error_code & X86_PF_WRITE)
flags |= FAULT_FLAG_WRITE;
- if (error_code & PF_INSTR)
+ if (error_code & X86_PF_INSTR)
flags |= FAULT_FLAG_INSTRUCTION;
/*
@@ -1382,7 +1362,7 @@
* space check, thus avoiding the deadlock:
*/
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
- if ((error_code & PF_USER) == 0 &&
+ if (!(error_code & X86_PF_USER) &&
!search_exception_tables(regs->ip)) {
bad_area_nosemaphore(regs, error_code, address, NULL);
return;
@@ -1409,7 +1389,7 @@
bad_area(regs, error_code, address);
return;
}
- if (error_code & PF_USER) {
+ if (error_code & X86_PF_USER) {
/*
* Accessing the stack below %sp is always a bug.
* The large cushion allows instructions like enter
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 048fbe8..adcea90 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1426,16 +1426,16 @@
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
void register_page_bootmem_memmap(unsigned long section_nr,
- struct page *start_page, unsigned long size)
+ struct page *start_page, unsigned long nr_pages)
{
unsigned long addr = (unsigned long)start_page;
- unsigned long end = (unsigned long)(start_page + size);
+ unsigned long end = (unsigned long)(start_page + nr_pages);
unsigned long next;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
- unsigned int nr_pages;
+ unsigned int nr_pmd_pages;
struct page *page;
for (; addr < end; addr = next) {
@@ -1482,9 +1482,9 @@
if (pmd_none(*pmd))
continue;
- nr_pages = 1 << (get_order(PMD_SIZE));
+ nr_pmd_pages = 1 << get_order(PMD_SIZE);
page = pmd_page(*pmd);
- while (nr_pages--)
+ while (nr_pmd_pages--)
get_page_bootmem(section_nr, page++,
SECTION_INFO);
}
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8f5be3e..2b60dc6 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -16,6 +16,8 @@
extern struct range pfn_mapped[E820_MAX_ENTRIES];
+static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+
static int __init map_range(struct range *range)
{
unsigned long start;
@@ -31,8 +33,10 @@
unsigned long end)
{
pgd_t *pgd;
+ /* See comment in kasan_init() */
+ unsigned long pgd_end = end & PGDIR_MASK;
- for (; start < end; start += PGDIR_SIZE) {
+ for (; start < pgd_end; start += PGDIR_SIZE) {
pgd = pgd_offset_k(start);
/*
* With folded p4d, pgd_clear() is nop, use p4d_clear()
@@ -43,29 +47,61 @@
else
pgd_clear(pgd);
}
+
+ pgd = pgd_offset_k(start);
+ for (; start < end; start += P4D_SIZE)
+ p4d_clear(p4d_offset(pgd, start));
+}
+
+static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
+{
+ unsigned long p4d;
+
+ if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ return (p4d_t *)pgd;
+
+ p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
+ p4d += __START_KERNEL_map - phys_base;
+ return (p4d_t *)p4d + p4d_index(addr);
+}
+
+static void __init kasan_early_p4d_populate(pgd_t *pgd,
+ unsigned long addr,
+ unsigned long end)
+{
+ pgd_t pgd_entry;
+ p4d_t *p4d, p4d_entry;
+ unsigned long next;
+
+ if (pgd_none(*pgd)) {
+ pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
+ set_pgd(pgd, pgd_entry);
+ }
+
+ p4d = early_p4d_offset(pgd, addr);
+ do {
+ next = p4d_addr_end(addr, end);
+
+ if (!p4d_none(*p4d))
+ continue;
+
+ p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
+ set_p4d(p4d, p4d_entry);
+ } while (p4d++, addr = next, addr != end && p4d_none(*p4d));
}
static void __init kasan_map_early_shadow(pgd_t *pgd)
{
- int i;
- unsigned long start = KASAN_SHADOW_START;
+ /* See comment in kasan_init() */
+ unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
unsigned long end = KASAN_SHADOW_END;
+ unsigned long next;
- for (i = pgd_index(start); start < end; i++) {
- switch (CONFIG_PGTABLE_LEVELS) {
- case 4:
- pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) |
- _KERNPG_TABLE);
- break;
- case 5:
- pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
- _KERNPG_TABLE);
- break;
- default:
- BUILD_BUG();
- }
- start += PGDIR_SIZE;
- }
+ pgd += pgd_index(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ kasan_early_p4d_populate(pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
}
#ifdef CONFIG_KASAN_INLINE
@@ -102,7 +138,7 @@
for (i = 0; i < PTRS_PER_PUD; i++)
kasan_zero_pud[i] = __pud(pud_val);
- for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
+ for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
kasan_zero_p4d[i] = __p4d(p4d_val);
kasan_map_early_shadow(early_top_pgt);
@@ -118,12 +154,35 @@
#endif
memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
+
+ /*
+ * We use the same shadow offset for 4- and 5-level paging to
+ * facilitate boot-time switching between paging modes.
+ * As result in 5-level paging mode KASAN_SHADOW_START and
+ * KASAN_SHADOW_END are not aligned to PGD boundary.
+ *
+ * KASAN_SHADOW_START doesn't share PGD with anything else.
+ * We claim whole PGD entry to make things easier.
+ *
+ * KASAN_SHADOW_END lands in the last PGD entry and it collides with
+ * bunch of things like kernel code, modules, EFI mapping, etc.
+ * We need to take extra steps to not overwrite them.
+ */
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ void *ptr;
+
+ ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
+ memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
+ set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
+ __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
+ }
+
load_cr3(early_top_pgt);
__flush_tlb_all();
- clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+ clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
- kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
+ kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
kasan_mem_to_shadow((void *)PAGE_OFFSET));
for (i = 0; i < E820_MAX_ENTRIES; i++) {
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 350f709..7913b69 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -212,8 +212,8 @@
eax.full = cpuid_eax(0xa);
/* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
- if (eax.split.version_id == 0 && __this_cpu_read(cpu_info.x86) == 6 &&
- __this_cpu_read(cpu_info.x86_model) == 15) {
+ if (eax.split.version_id == 0 && boot_cpu_data.x86 == 6 &&
+ boot_cpu_data.x86_model == 15) {
eax.split.version_id = 2;
eax.split.num_counters = 2;
eax.split.bit_width = 40;
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 836a1eb..3ee234b6 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -6,6 +6,7 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <os.h>
@@ -369,7 +370,9 @@
mm->arch.ldt.entry_count = 0;
}
-int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
+ unsigned long , bytecount)
{
- return do_modify_ldt_skas(func, ptr, bytecount);
+ /* See non-um modify_ldt() for why we do this cast */
+ return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
}
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 168efb2..fbd054d 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -601,7 +601,7 @@
#ifdef CONFIG_X86_MCE
{ machine_check, xen_machine_check, true },
#endif
- { nmi, xen_nmi, true },
+ { nmi, xen_xennmi, true },
{ overflow, xen_overflow, false },
#ifdef CONFIG_IA32_EMULATION
{ entry_INT80_compat, xen_entry_INT80_compat, false },
@@ -811,15 +811,14 @@
}
}
-static void xen_load_sp0(struct tss_struct *tss,
- struct thread_struct *thread)
+static void xen_load_sp0(unsigned long sp0)
{
struct multicall_space mcs;
mcs = xen_mc_entry(0);
- MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
+ MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
xen_mc_issue(PARAVIRT_LAZY_CPU);
- tss->x86_tss.sp0 = thread->sp0;
+ this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
}
void xen_set_iopl_mask(unsigned mask)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 71495f1..2ccdaba 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -449,7 +449,7 @@
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
-#if CONFIG_PGTABLE_LEVELS == 4
+#ifdef CONFIG_X86_64
__visible pudval_t xen_pud_val(pud_t pud)
{
return pte_mfn_to_pfn(pud.pud);
@@ -538,7 +538,7 @@
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
-#endif /* CONFIG_PGTABLE_LEVELS == 4 */
+#endif /* CONFIG_X86_64 */
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
@@ -580,21 +580,17 @@
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
bool last, unsigned long limit)
{
- int i, nr, flush = 0;
+ int flush = 0;
+ pud_t *pud;
- nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
- for (i = 0; i < nr; i++) {
- pud_t *pud;
- if (p4d_none(p4d[i]))
- continue;
+ if (p4d_none(*p4d))
+ return flush;
- pud = pud_offset(&p4d[i], 0);
- if (PTRS_PER_PUD > 1)
- flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
- flush |= xen_pud_walk(mm, pud, func,
- last && i == nr - 1, limit);
- }
+ pud = pud_offset(p4d, 0);
+ if (PTRS_PER_PUD > 1)
+ flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
+ flush |= xen_pud_walk(mm, pud, func, last, limit);
return flush;
}
@@ -644,8 +640,6 @@
continue;
p4d = p4d_offset(&pgd[i], 0);
- if (PTRS_PER_P4D > 1)
- flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
}
@@ -1176,22 +1170,14 @@
{
pgd_t *pgd;
p4d_t *p4d;
- unsigned int i;
bool unpin;
unpin = (vaddr == 2 * PGDIR_SIZE);
vaddr &= PMD_MASK;
pgd = pgd_offset_k(vaddr);
p4d = p4d_offset(pgd, 0);
- for (i = 0; i < PTRS_PER_P4D; i++) {
- if (p4d_none(p4d[i]))
- continue;
- xen_cleanmfnmap_p4d(p4d + i, unpin);
- }
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- set_pgd(pgd, __pgd(0));
- xen_cleanmfnmap_free_pgtbl(p4d, unpin);
- }
+ if (!p4d_none(*p4d))
+ xen_cleanmfnmap_p4d(p4d, unpin);
}
static void __init xen_pagetable_p2m_free(void)
@@ -1692,7 +1678,7 @@
xen_release_ptpage(pfn, PT_PMD);
}
-#if CONFIG_PGTABLE_LEVELS >= 4
+#ifdef CONFIG_X86_64
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
{
xen_alloc_ptpage(mm, pfn, PT_PUD);
@@ -2029,13 +2015,12 @@
*/
void __init xen_relocate_p2m(void)
{
- phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
+ phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
- int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
+ int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
pte_t *pt;
pmd_t *pmd;
pud_t *pud;
- p4d_t *p4d = NULL;
pgd_t *pgd;
unsigned long *new_p2m;
int save_pud;
@@ -2045,11 +2030,7 @@
n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
- if (PTRS_PER_P4D > 1)
- n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
- else
- n_p4d = 0;
- n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
+ n_frames = n_pte + n_pt + n_pmd + n_pud;
new_area = xen_find_free_area(PFN_PHYS(n_frames));
if (!new_area) {
@@ -2065,76 +2046,56 @@
* To avoid any possible virtual address collision, just use
* 2 * PUD_SIZE for the new area.
*/
- p4d_phys = new_area;
- pud_phys = p4d_phys + PFN_PHYS(n_p4d);
+ pud_phys = new_area;
pmd_phys = pud_phys + PFN_PHYS(n_pud);
pt_phys = pmd_phys + PFN_PHYS(n_pmd);
p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
pgd = __va(read_cr3_pa());
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
- idx_p4d = 0;
save_pud = n_pud;
- do {
- if (n_p4d > 0) {
- p4d = early_memremap(p4d_phys, PAGE_SIZE);
- clear_page(p4d);
- n_pud = min(save_pud, PTRS_PER_P4D);
- }
- for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
- pud = early_memremap(pud_phys, PAGE_SIZE);
- clear_page(pud);
- for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
- idx_pmd++) {
- pmd = early_memremap(pmd_phys, PAGE_SIZE);
- clear_page(pmd);
- for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
- idx_pt++) {
- pt = early_memremap(pt_phys, PAGE_SIZE);
- clear_page(pt);
- for (idx_pte = 0;
- idx_pte < min(n_pte, PTRS_PER_PTE);
- idx_pte++) {
- set_pte(pt + idx_pte,
- pfn_pte(p2m_pfn, PAGE_KERNEL));
- p2m_pfn++;
- }
- n_pte -= PTRS_PER_PTE;
- early_memunmap(pt, PAGE_SIZE);
- make_lowmem_page_readonly(__va(pt_phys));
- pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
- PFN_DOWN(pt_phys));
- set_pmd(pmd + idx_pt,
- __pmd(_PAGE_TABLE | pt_phys));
- pt_phys += PAGE_SIZE;
+ for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
+ pud = early_memremap(pud_phys, PAGE_SIZE);
+ clear_page(pud);
+ for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
+ idx_pmd++) {
+ pmd = early_memremap(pmd_phys, PAGE_SIZE);
+ clear_page(pmd);
+ for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
+ idx_pt++) {
+ pt = early_memremap(pt_phys, PAGE_SIZE);
+ clear_page(pt);
+ for (idx_pte = 0;
+ idx_pte < min(n_pte, PTRS_PER_PTE);
+ idx_pte++) {
+ set_pte(pt + idx_pte,
+ pfn_pte(p2m_pfn, PAGE_KERNEL));
+ p2m_pfn++;
}
- n_pt -= PTRS_PER_PMD;
- early_memunmap(pmd, PAGE_SIZE);
- make_lowmem_page_readonly(__va(pmd_phys));
- pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
- PFN_DOWN(pmd_phys));
- set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
- pmd_phys += PAGE_SIZE;
+ n_pte -= PTRS_PER_PTE;
+ early_memunmap(pt, PAGE_SIZE);
+ make_lowmem_page_readonly(__va(pt_phys));
+ pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
+ PFN_DOWN(pt_phys));
+ set_pmd(pmd + idx_pt,
+ __pmd(_PAGE_TABLE | pt_phys));
+ pt_phys += PAGE_SIZE;
}
- n_pmd -= PTRS_PER_PUD;
- early_memunmap(pud, PAGE_SIZE);
- make_lowmem_page_readonly(__va(pud_phys));
- pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
- if (n_p4d > 0)
- set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
- else
- set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
- pud_phys += PAGE_SIZE;
+ n_pt -= PTRS_PER_PMD;
+ early_memunmap(pmd, PAGE_SIZE);
+ make_lowmem_page_readonly(__va(pmd_phys));
+ pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
+ PFN_DOWN(pmd_phys));
+ set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
+ pmd_phys += PAGE_SIZE;
}
- if (n_p4d > 0) {
- save_pud -= PTRS_PER_P4D;
- early_memunmap(p4d, PAGE_SIZE);
- make_lowmem_page_readonly(__va(p4d_phys));
- pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
- set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
- p4d_phys += PAGE_SIZE;
- }
- } while (++idx_p4d < n_p4d);
+ n_pmd -= PTRS_PER_PUD;
+ early_memunmap(pud, PAGE_SIZE);
+ make_lowmem_page_readonly(__va(pud_phys));
+ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
+ set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
+ pud_phys += PAGE_SIZE;
+ }
/* Now copy the old p2m info to the new area. */
memcpy(new_p2m, xen_p2m_addr, size);
@@ -2361,7 +2322,7 @@
pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud;
-#if CONFIG_PGTABLE_LEVELS >= 4
+#ifdef CONFIG_X86_64
pv_mmu_ops.set_p4d = xen_set_p4d;
#endif
@@ -2371,7 +2332,7 @@
pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
pv_mmu_ops.release_pte = xen_release_pte;
pv_mmu_ops.release_pmd = xen_release_pmd;
-#if CONFIG_PGTABLE_LEVELS >= 4
+#ifdef CONFIG_X86_64
pv_mmu_ops.alloc_pud = xen_alloc_pud;
pv_mmu_ops.release_pud = xen_release_pud;
#endif
@@ -2435,14 +2396,14 @@
.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
-#if CONFIG_PGTABLE_LEVELS >= 4
+#ifdef CONFIG_X86_64
.pud_val = PV_CALLEE_SAVE(xen_pud_val),
.make_pud = PV_CALLEE_SAVE(xen_make_pud),
.set_p4d = xen_set_p4d_hyper,
.alloc_pud = xen_alloc_pmd_init,
.release_pud = xen_release_pmd_init,
-#endif /* CONFIG_PGTABLE_LEVELS == 4 */
+#endif /* CONFIG_X86_64 */
.activate_mm = xen_activate_mm,
.dup_mmap = xen_dup_mmap,
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 05f91ce..c0c756c 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -14,6 +14,7 @@
* single-threaded.
*/
#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/smp.h>
@@ -294,12 +295,19 @@
#endif
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
+ /*
+ * Bring up the CPU in cpu_bringup_and_idle() with the stack
+ * pointing just below where pt_regs would be if it were a normal
+ * kernel entry.
+ */
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
ctxt->flags = VGCF_IN_KERNEL;
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
ctxt->user_regs.ds = __USER_DS;
ctxt->user_regs.es = __USER_DS;
ctxt->user_regs.ss = __KERNEL_DS;
+ ctxt->user_regs.cs = __KERNEL_CS;
+ ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
xen_copy_trap_info(ctxt->trap_ctxt);
@@ -314,8 +322,13 @@
ctxt->gdt_frames[0] = gdt_mfn;
ctxt->gdt_ents = GDT_ENTRIES;
+ /*
+ * Set SS:SP that Xen will use when entering guest kernel mode
+ * from guest user mode. Subsequent calls to load_sp0() can
+ * change this value.
+ */
ctxt->kernel_ss = __KERNEL_DS;
- ctxt->kernel_sp = idle->thread.sp0;
+ ctxt->kernel_sp = task_top_of_stack(idle);
#ifdef CONFIG_X86_32
ctxt->event_callback_cs = __KERNEL_CS;
@@ -327,10 +340,8 @@
(unsigned long)xen_hypervisor_callback;
ctxt->failsafe_callback_eip =
(unsigned long)xen_failsafe_callback;
- ctxt->user_regs.cs = __KERNEL_CS;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
- ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
BUG();
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c98a48c..8a10c9a 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -30,7 +30,7 @@
xen_pv_trap xendebug
xen_pv_trap int3
xen_pv_trap xenint3
-xen_pv_trap nmi
+xen_pv_trap xennmi
xen_pv_trap overflow
xen_pv_trap bounds
xen_pv_trap invalid_op
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index b5b8d7f..497cc55 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -10,6 +10,7 @@
#include <asm/boot.h>
#include <asm/asm.h>
#include <asm/page_types.h>
+#include <asm/unwind_hints.h>
#include <xen/interface/elfnote.h>
#include <xen/interface/features.h>
@@ -20,6 +21,7 @@
#ifdef CONFIG_XEN_PV
__INIT
ENTRY(startup_xen)
+ UNWIND_HINT_EMPTY
cld
/* Clear .bss */
@@ -34,21 +36,24 @@
mov $init_thread_union+THREAD_SIZE, %_ASM_SP
jmp xen_start_kernel
-
+END(startup_xen)
__FINIT
#endif
.pushsection .text
.balign PAGE_SIZE
ENTRY(hypercall_page)
- .skip PAGE_SIZE
+ .rept (PAGE_SIZE / 32)
+ UNWIND_HINT_EMPTY
+ .skip 32
+ .endr
#define HYPERCALL(n) \
.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
#include <asm/xen-hypercalls.h>
#undef HYPERCALL
-
+END(hypercall_page)
.popsection
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 3c3a37b..572b6c7 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -51,6 +51,7 @@
#include <acpi/actbl1.h>
#include <acpi/ghes.h>
#include <acpi/apei.h>
+#include <asm/fixmap.h>
#include <asm/tlbflush.h>
#include <ras/ras_event.h>
@@ -112,7 +113,7 @@
* Because the memory area used to transfer hardware error information
* from BIOS to Linux can be determined only in NMI, IRQ or timer
* handler, but general ioremap can not be used in atomic context, so
- * a special version of atomic ioremap is implemented for that.
+ * the fixmap is used instead.
*/
/*
@@ -126,8 +127,8 @@
/* virtual memory area for atomic ioremap */
static struct vm_struct *ghes_ioremap_area;
/*
- * These 2 spinlock is used to prevent atomic ioremap virtual memory
- * area from being mapped simultaneously.
+ * These 2 spinlocks are used to prevent the fixmap entries from being used
+ * simultaneously.
*/
static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
@@ -159,52 +160,36 @@
static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
{
- unsigned long vaddr;
phys_addr_t paddr;
pgprot_t prot;
- vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
-
paddr = pfn << PAGE_SHIFT;
prot = arch_apei_get_mem_attribute(paddr);
- ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
+ __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
- return (void __iomem *)vaddr;
+ return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
}
static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
{
- unsigned long vaddr, paddr;
+ phys_addr_t paddr;
pgprot_t prot;
- vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
-
paddr = pfn << PAGE_SHIFT;
prot = arch_apei_get_mem_attribute(paddr);
+ __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
- ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
-
- return (void __iomem *)vaddr;
+ return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
}
-static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
+static void ghes_iounmap_nmi(void)
{
- unsigned long vaddr = (unsigned long __force)vaddr_ptr;
- void *base = ghes_ioremap_area->addr;
-
- BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
- unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
- arch_apei_flush_tlb_one(vaddr);
+ clear_fixmap(FIX_APEI_GHES_NMI);
}
-static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
+static void ghes_iounmap_irq(void)
{
- unsigned long vaddr = (unsigned long __force)vaddr_ptr;
- void *base = ghes_ioremap_area->addr;
-
- BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
- unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
- arch_apei_flush_tlb_one(vaddr);
+ clear_fixmap(FIX_APEI_GHES_IRQ);
}
static int ghes_estatus_pool_init(void)
@@ -360,10 +345,10 @@
paddr += trunk;
buffer += trunk;
if (in_nmi) {
- ghes_iounmap_nmi(vaddr);
+ ghes_iounmap_nmi();
raw_spin_unlock(&ghes_ioremap_lock_nmi);
} else {
- ghes_iounmap_irq(vaddr);
+ ghes_iounmap_irq();
spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
}
}
@@ -851,17 +836,8 @@
synchronize_rcu();
}
#else /* CONFIG_ACPI_APEI_SEA */
-static inline void ghes_sea_add(struct ghes *ghes)
-{
- pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n",
- ghes->generic->header.source_id);
-}
-
-static inline void ghes_sea_remove(struct ghes *ghes)
-{
- pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n",
- ghes->generic->header.source_id);
-}
+static inline void ghes_sea_add(struct ghes *ghes) { }
+static inline void ghes_sea_remove(struct ghes *ghes) { }
#endif /* CONFIG_ACPI_APEI_SEA */
#ifdef CONFIG_HAVE_ACPI_APEI_NMI
@@ -1063,23 +1039,9 @@
init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
}
#else /* CONFIG_HAVE_ACPI_APEI_NMI */
-static inline void ghes_nmi_add(struct ghes *ghes)
-{
- pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
- ghes->generic->header.source_id);
- BUG();
-}
-
-static inline void ghes_nmi_remove(struct ghes *ghes)
-{
- pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
- ghes->generic->header.source_id);
- BUG();
-}
-
-static inline void ghes_nmi_init_cxt(void)
-{
-}
+static inline void ghes_nmi_add(struct ghes *ghes) { }
+static inline void ghes_nmi_remove(struct ghes *ghes) { }
+static inline void ghes_nmi_init_cxt(void) { }
#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
static int ghes_probe(struct platform_device *ghes_dev)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b640ad8..adc877d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2692,7 +2692,7 @@
* from the parent.
*/
page_count = (u32)calc_pages_for(0, length);
- pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+ pages = ceph_alloc_page_vector(page_count, GFP_NOIO);
if (IS_ERR(pages)) {
result = PTR_ERR(pages);
pages = NULL;
@@ -2827,7 +2827,7 @@
*/
size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
page_count = (u32)calc_pages_for(0, size);
- pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+ pages = ceph_alloc_page_vector(page_count, GFP_NOIO);
if (IS_ERR(pages)) {
ret = PTR_ERR(pages);
goto fail_stat_request;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4ac454a..83876a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2094,6 +2094,11 @@
goto err;
}
+ if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+ err = -EINVAL;
+ goto err;
+ }
+
syncobj = drm_syncobj_find(file, fence.handle);
if (!syncobj) {
DRM_DEBUG("Invalid syncobj handle provided\n");
@@ -2101,6 +2106,9 @@
goto err;
}
+ BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+ ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e2410eb..ad524cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -832,10 +832,14 @@
}
}
-struct sgt_dma {
+static inline struct sgt_dma {
struct scatterlist *sg;
dma_addr_t dma, max;
-};
+} sgt_dma(struct i915_vma *vma) {
+ struct scatterlist *sg = vma->pages->sgl;
+ dma_addr_t addr = sg_dma_address(sg);
+ return (struct sgt_dma) { sg, addr, addr + sg->length };
+}
struct gen8_insert_pte {
u16 pml4e;
@@ -916,11 +920,7 @@
u32 unused)
{
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- struct sgt_dma iter = {
- .sg = vma->pages->sgl,
- .dma = sg_dma_address(iter.sg),
- .max = iter.dma + iter.sg->length,
- };
+ struct sgt_dma iter = sgt_dma(vma);
struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
@@ -933,11 +933,7 @@
u32 unused)
{
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- struct sgt_dma iter = {
- .sg = vma->pages->sgl,
- .dma = sg_dma_address(iter.sg),
- .max = iter.dma + iter.sg->length,
- };
+ struct sgt_dma iter = sgt_dma(vma);
struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
@@ -1632,13 +1628,10 @@
unsigned act_pt = first_entry / GEN6_PTES;
unsigned act_pte = first_entry % GEN6_PTES;
const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
- struct sgt_dma iter;
+ struct sgt_dma iter = sgt_dma(vma);
gen6_pte_t *vaddr;
vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
- iter.sg = vma->pages->sgl;
- iter.dma = sg_dma_address(iter.sg);
- iter.max = iter.dma + iter.sg->length;
do {
vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index e84fee3..184340d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -721,7 +721,7 @@
* allocation taken by fbdev
*/
if (!(dev_priv->capabilities & SVGA_CAP_3D))
- mem_size *= 2;
+ mem_size *= 3;
dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE;
dev_priv->prim_bb_mem =
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 3bbad22..d6b1c50 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -224,7 +224,7 @@
return ret;
}
-static struct dma_fence_ops vmw_fence_ops = {
+static const struct dma_fence_ops vmw_fence_ops = {
.get_driver_name = vmw_fence_get_driver_name,
.get_timeline_name = vmw_fence_get_timeline_name,
.enable_signaling = vmw_fence_enable_signaling,
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 6d6b092..d613590 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1258,6 +1258,7 @@
{ "ELAN0605", 0 },
{ "ELAN0609", 0 },
{ "ELAN060B", 0 },
+ { "ELAN060C", 0 },
{ "ELAN0611", 0 },
{ "ELAN1000", 0 },
{ }
diff --git a/drivers/input/rmi4/rmi_smbus.c b/drivers/input/rmi4/rmi_smbus.c
index 225025a..b6ccf39c 100644
--- a/drivers/input/rmi4/rmi_smbus.c
+++ b/drivers/input/rmi4/rmi_smbus.c
@@ -312,7 +312,7 @@
rmi_smb->xport.dev = &client->dev;
rmi_smb->xport.pdata = *pdata;
rmi_smb->xport.pdata.irq = client->irq;
- rmi_smb->xport.proto_name = "smb2";
+ rmi_smb->xport.proto_name = "smb";
rmi_smb->xport.ops = &rmi_smb_ops;
smbus_version = rmi_smb_get_version(rmi_smb);
@@ -322,7 +322,7 @@
rmi_dbg(RMI_DEBUG_XPORT, &client->dev, "Smbus version is %d",
smbus_version);
- if (smbus_version != 2) {
+ if (smbus_version != 2 && smbus_version != 3) {
dev_err(&client->dev, "Unrecognized SMB version %d\n",
smbus_version);
return -ENODEV;
diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c
index 88ea5e1..abf2757 100644
--- a/drivers/input/touchscreen/tsc200x-core.c
+++ b/drivers/input/touchscreen/tsc200x-core.c
@@ -531,6 +531,7 @@
input_set_drvdata(input_dev, ts);
+ __set_bit(INPUT_PROP_DIRECT, input_dev->propbit);
input_set_capability(input_dev, EV_KEY, BTN_TOUCH);
input_set_abs_params(input_dev, ABS_X,
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
index eda38cb..41f2a9f 100644
--- a/drivers/misc/pti.c
+++ b/drivers/misc/pti.c
@@ -32,7 +32,7 @@
#include <linux/pci.h>
#include <linux/mutex.h>
#include <linux/miscdevice.h>
-#include <linux/pti.h>
+#include <linux/intel-pti.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c
index cf7c189..d065c0e 100644
--- a/drivers/net/can/c_can/c_can_pci.c
+++ b/drivers/net/can/c_can/c_can_pci.c
@@ -178,7 +178,6 @@
break;
case BOSCH_D_CAN:
priv->regs = reg_map_d_can;
- priv->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES;
break;
default:
ret = -EINVAL;
diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
index 46a746e..b5145a7 100644
--- a/drivers/net/can/c_can/c_can_platform.c
+++ b/drivers/net/can/c_can/c_can_platform.c
@@ -320,7 +320,6 @@
break;
case BOSCH_D_CAN:
priv->regs = reg_map_d_can;
- priv->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES;
priv->read_reg = c_can_plat_read_reg_aligned_to_16bit;
priv->write_reg = c_can_plat_write_reg_aligned_to_16bit;
priv->read_reg32 = d_can_plat_read_reg32;
diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c
index 4d1fe8d..2772d05 100644
--- a/drivers/net/can/ifi_canfd/ifi_canfd.c
+++ b/drivers/net/can/ifi_canfd/ifi_canfd.c
@@ -670,9 +670,9 @@
priv->base + IFI_CANFD_FTIME);
/* Configure transmitter delay */
- tdc = (dbt->brp * (dbt->phase_seg1 + 1)) & IFI_CANFD_TDELAY_MASK;
- writel(IFI_CANFD_TDELAY_EN | IFI_CANFD_TDELAY_ABS | tdc,
- priv->base + IFI_CANFD_TDELAY);
+ tdc = dbt->brp * (dbt->prop_seg + dbt->phase_seg1);
+ tdc &= IFI_CANFD_TDELAY_MASK;
+ writel(IFI_CANFD_TDELAY_EN | tdc, priv->base + IFI_CANFD_TDELAY);
}
static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id,
diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c
index 51c2d18..b4efd71 100644
--- a/drivers/net/can/peak_canfd/peak_pciefd_main.c
+++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c
@@ -29,14 +29,19 @@
#include "peak_canfd_user.h"
MODULE_AUTHOR("Stephane Grosjean <s.grosjean@peak-system.com>");
-MODULE_DESCRIPTION("Socket-CAN driver for PEAK PCAN PCIe FD family cards");
-MODULE_SUPPORTED_DEVICE("PEAK PCAN PCIe FD CAN cards");
+MODULE_DESCRIPTION("Socket-CAN driver for PEAK PCAN PCIe/M.2 FD family cards");
+MODULE_SUPPORTED_DEVICE("PEAK PCAN PCIe/M.2 FD CAN cards");
MODULE_LICENSE("GPL v2");
#define PCIEFD_DRV_NAME "peak_pciefd"
#define PEAK_PCI_VENDOR_ID 0x001c /* The PCI device and vendor IDs */
#define PEAK_PCIEFD_ID 0x0013 /* for PCIe slot cards */
+#define PCAN_CPCIEFD_ID 0x0014 /* for Compact-PCI Serial slot cards */
+#define PCAN_PCIE104FD_ID 0x0017 /* for PCIe-104 Express slot cards */
+#define PCAN_MINIPCIEFD_ID 0x0018 /* for mini-PCIe slot cards */
+#define PCAN_PCIEFD_OEM_ID 0x0019 /* for PCIe slot OEM cards */
+#define PCAN_M2_ID 0x001a /* for M2 slot cards */
/* PEAK PCIe board access description */
#define PCIEFD_BAR0_SIZE (64 * 1024)
@@ -203,6 +208,11 @@
/* supported device ids. */
static const struct pci_device_id peak_pciefd_tbl[] = {
{PEAK_PCI_VENDOR_ID, PEAK_PCIEFD_ID, PCI_ANY_ID, PCI_ANY_ID,},
+ {PEAK_PCI_VENDOR_ID, PCAN_CPCIEFD_ID, PCI_ANY_ID, PCI_ANY_ID,},
+ {PEAK_PCI_VENDOR_ID, PCAN_PCIE104FD_ID, PCI_ANY_ID, PCI_ANY_ID,},
+ {PEAK_PCI_VENDOR_ID, PCAN_MINIPCIEFD_ID, PCI_ANY_ID, PCI_ANY_ID,},
+ {PEAK_PCI_VENDOR_ID, PCAN_PCIEFD_OEM_ID, PCI_ANY_ID, PCI_ANY_ID,},
+ {PEAK_PCI_VENDOR_ID, PCAN_M2_ID, PCI_ANY_ID, PCI_ANY_ID,},
{0,}
};
diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c
index b0c8085..1ac2090 100644
--- a/drivers/net/can/sun4i_can.c
+++ b/drivers/net/can/sun4i_can.c
@@ -539,6 +539,13 @@
}
stats->rx_over_errors++;
stats->rx_errors++;
+
+ /* reset the CAN IP by entering reset mode
+ * ignoring timeout error
+ */
+ set_reset_mode(dev);
+ set_normal_mode(dev);
+
/* clear bit */
sun4i_can_write_cmdreg(priv, SUN4I_CMD_CLEAR_OR_FLAG);
}
@@ -653,8 +660,9 @@
netif_wake_queue(dev);
can_led_event(dev, CAN_LED_EVENT_TX);
}
- if (isrc & SUN4I_INT_RBUF_VLD) {
- /* receive interrupt */
+ if ((isrc & SUN4I_INT_RBUF_VLD) &&
+ !(isrc & SUN4I_INT_DATA_OR)) {
+ /* receive interrupt - don't read if overrun occurred */
while (status & SUN4I_STA_RBUF_RDY) {
/* RX buffer is not empty */
sun4i_can_rx(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index fc28171..17b7232 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -93,7 +93,7 @@
list_splice_init(&priv->waiting_events_list, &temp);
if (!dev_ctx->context)
goto out;
- list_for_each_entry_safe(de, n, &priv->waiting_events_list, list)
+ list_for_each_entry_safe(de, n, &temp, list)
dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index cc13d3d..13b5ef9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -67,7 +67,7 @@
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd
-#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1
+#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 15a1687..91b1b09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -215,22 +215,20 @@
static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
struct mlx5e_dma_info *dma_info)
{
- struct page *page;
-
if (mlx5e_rx_cache_get(rq, dma_info))
return 0;
- page = dev_alloc_pages(rq->buff.page_order);
- if (unlikely(!page))
+ dma_info->page = dev_alloc_pages(rq->buff.page_order);
+ if (unlikely(!dma_info->page))
return -ENOMEM;
- dma_info->addr = dma_map_page(rq->pdev, page, 0,
+ dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
RQ_PAGE_SIZE(rq), rq->buff.map_dir);
if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
- put_page(page);
+ put_page(dma_info->page);
+ dma_info->page = NULL;
return -ENOMEM;
}
- dma_info->page = page;
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index e906b75..ab92298 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -49,7 +49,7 @@
struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
napi);
bool busy = false;
- int work_done;
+ int work_done = 0;
int i;
for (i = 0; i < c->num_tc; i++)
@@ -58,15 +58,17 @@
if (c->xdp)
busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq);
- work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
- busy |= work_done == budget;
+ if (likely(budget)) { /* budget=0 means: don't poll rx rings */
+ work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
+ busy |= work_done == budget;
+ }
busy |= c->rq.post_wqes(&c->rq);
if (busy) {
if (likely(mlx5e_channel_no_affinity_change(c)))
return budget;
- if (work_done == budget)
+ if (budget && work_done == budget)
work_done--;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0d2c8dc..06562c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1482,9 +1482,16 @@
return -EAGAIN;
}
+ /* Panic tear down fw command will stop the PCI bus communication
+ * with the HCA, so the health polll is no longer needed.
+ */
+ mlx5_drain_health_wq(dev);
+ mlx5_stop_health_poll(dev);
+
ret = mlx5_cmd_force_teardown_hca(dev);
if (ret) {
mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
+ mlx5_start_health_poll(dev);
return ret;
}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8acfc1e..63e56f6 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -687,7 +687,7 @@
#define BUG_TABLE
#endif
-#ifdef CONFIG_ORC_UNWINDER
+#ifdef CONFIG_UNWINDER_ORC
#define ORC_UNWIND_TABLE \
. = ALIGN(4); \
.orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \
diff --git a/include/linux/pti.h b/include/linux/intel-pti.h
similarity index 94%
rename from include/linux/pti.h
rename to include/linux/intel-pti.h
index b3ea01a..2710d72 100644
--- a/include/linux/pti.h
+++ b/include/linux/intel-pti.h
@@ -22,8 +22,8 @@
* interface to write out it's contents for debugging a mobile system.
*/
-#ifndef PTI_H_
-#define PTI_H_
+#ifndef LINUX_INTEL_PTI_H_
+#define LINUX_INTEL_PTI_H_
/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
#define PTI_LASTDWORD_DTS 0x30
@@ -40,4 +40,4 @@
const char *thread_name);
void pti_release_masterchannel(struct pti_masterchannel *mc);
-#endif /*PTI_H_*/
+#endif /* LINUX_INTEL_PTI_H_ */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 43edf65..91b46f9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2496,7 +2496,7 @@
void vmemmap_free(unsigned long start, unsigned long end);
#endif
void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
- unsigned long size);
+ unsigned long nr_pages);
enum mf_flags {
MF_COUNT_INCREASED = 1 << 0,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c9c4a81..a507f43 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1151,13 +1151,17 @@
#define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
#ifdef CONFIG_SPARSEMEM_EXTREME
-extern struct mem_section *mem_section[NR_SECTION_ROOTS];
+extern struct mem_section **mem_section;
#else
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif
static inline struct mem_section *__nr_to_section(unsigned long nr)
{
+#ifdef CONFIG_SPARSEMEM_EXTREME
+ if (!mem_section)
+ return NULL;
+#endif
if (!mem_section[SECTION_NR_TO_ROOT(nr)])
return NULL;
return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 6598fb7..9816590 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -829,6 +829,7 @@
#define I915_EXEC_FENCE_WAIT (1<<0)
#define I915_EXEC_FENCE_SIGNAL (1<<1)
+#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1))
__u32 flags;
};
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index dfdad67..ff21b4d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -376,7 +376,7 @@
that runtime stack traces are more reliable.
This is also a prerequisite for generation of ORC unwind data, which
- is needed for CONFIG_ORC_UNWINDER.
+ is needed for CONFIG_UNWINDER_ORC.
For more information, see
tools/objtool/Documentation/stack-validation.txt.
diff --git a/mm/gup.c b/mm/gup.c
index b2b4d42..dfcde13 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1643,6 +1643,47 @@
return 1;
}
+static void gup_pgd_range(unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pgd_t *pgdp;
+
+ pgdp = pgd_offset(current->mm, addr);
+ do {
+ pgd_t pgd = READ_ONCE(*pgdp);
+
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ return;
+ if (unlikely(pgd_huge(pgd))) {
+ if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+ pages, nr))
+ return;
+ } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
+ if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
+ PGDIR_SHIFT, next, write, pages, nr))
+ return;
+ } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
+ return;
+ } while (pgdp++, addr = next, addr != end);
+}
+
+#ifndef gup_fast_permitted
+/*
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
+ * we need to fall back to the slow version:
+ */
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
+{
+ unsigned long len, end;
+
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ return end >= start;
+}
+#endif
+
/*
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
* the regular GUP. It will only return non-negative values.
@@ -1650,10 +1691,8 @@
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
- struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
- unsigned long next, flags;
- pgd_t *pgdp;
+ unsigned long flags;
int nr = 0;
start &= PAGE_MASK;
@@ -1677,45 +1716,15 @@
* block IPIs that come from THPs splitting.
*/
- local_irq_save(flags);
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = READ_ONCE(*pgdp);
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (unlikely(pgd_huge(pgd))) {
- if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
- pages, &nr))
- break;
- } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
- if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
- PGDIR_SHIFT, next, write, pages, &nr))
- break;
- } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
- local_irq_restore(flags);
+ if (gup_fast_permitted(start, nr_pages, write)) {
+ local_irq_save(flags);
+ gup_pgd_range(addr, end, write, pages, &nr);
+ local_irq_restore(flags);
+ }
return nr;
}
-#ifndef gup_fast_permitted
-/*
- * Check if it's allowed to use __get_user_pages_fast() for the range, or
- * we need to fall back to the slow version:
- */
-bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
-{
- unsigned long len, end;
-
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- return end >= start;
-}
-#endif
-
/**
* get_user_pages_fast() - pin user pages in memory
* @start: starting user address
@@ -1735,12 +1744,22 @@
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
+ unsigned long addr, len, end;
int nr = 0, ret = 0;
start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+
+ if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+ (void __user *)start, len)))
+ return 0;
if (gup_fast_permitted(start, nr_pages, write)) {
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
+ local_irq_disable();
+ gup_pgd_range(addr, end, write, pages, &nr);
+ local_irq_enable();
ret = nr;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 77e4d3c..8dfd13f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5646,6 +5646,16 @@
unsigned long start_pfn, end_pfn;
int i, this_nid;
+#ifdef CONFIG_SPARSEMEM_EXTREME
+ if (!mem_section) {
+ unsigned long size, align;
+
+ size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
+ align = 1 << (INTERNODE_CACHE_SHIFT);
+ mem_section = memblock_virt_alloc(size, align);
+ }
+#endif
+
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
memory_present(this_nid, start_pfn, end_pfn);
}
diff --git a/mm/sparse.c b/mm/sparse.c
index 4900707..0441388 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -23,8 +23,7 @@
* 1) mem_section - memory sections, mem_map's for valid memory
*/
#ifdef CONFIG_SPARSEMEM_EXTREME
-struct mem_section *mem_section[NR_SECTION_ROOTS]
- ____cacheline_internodealigned_in_smp;
+struct mem_section **mem_section;
#else
struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
____cacheline_internodealigned_in_smp;
@@ -101,7 +100,7 @@
int __section_nr(struct mem_section* ms)
{
unsigned long root_nr;
- struct mem_section* root;
+ struct mem_section *root = NULL;
for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
@@ -112,7 +111,7 @@
break;
}
- VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
+ VM_BUG_ON(!root);
return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
}
@@ -330,11 +329,17 @@
static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
{
unsigned long usemap_snr, pgdat_snr;
- static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
- static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
+ static unsigned long old_usemap_snr;
+ static unsigned long old_pgdat_snr;
struct pglist_data *pgdat = NODE_DATA(nid);
int usemap_nid;
+ /* First call */
+ if (!old_usemap_snr) {
+ old_usemap_snr = NR_MEM_SECTIONS;
+ old_pgdat_snr = NR_MEM_SECTIONS;
+ }
+
usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
if (usemap_snr == pgdat_snr)
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9649579..4a72ee4 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -376,6 +376,9 @@
dev->name);
vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
}
+ if (event == NETDEV_DOWN &&
+ (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
vlan_info = rtnl_dereference(dev->vlan_info);
if (!vlan_info)
@@ -423,9 +426,6 @@
struct net_device *tmp;
LIST_HEAD(close_list);
- if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
- vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
-
/* Put all VLANs for this dev in the down state too. */
vlan_group_for_each_dev(grp, i, vlandev) {
flgs = vlandev->flags;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index e6c06aa..1e2929f 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -133,6 +133,8 @@
if (err)
return err;
}
+
+ return 0;
}
for_each_set_bit(port, group, ds->num_ports)
@@ -180,6 +182,8 @@
if (err)
return err;
}
+
+ return 0;
}
for_each_set_bit(port, members, ds->num_ports)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b2fc716..b6bb3cd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2615,7 +2615,6 @@
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
unsigned int mss = tcp_current_mss(sk);
- u32 prior_lost = tp->lost_out;
tcp_for_write_queue(skb, sk) {
if (skb == tcp_send_head(sk))
@@ -2632,7 +2631,7 @@
tcp_clear_retrans_hints_partial(tp);
- if (prior_lost == tp->lost_out)
+ if (!tp->lost_out)
return;
if (tcp_is_reno(tp))
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 11f69bb..b6a2aa1 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -149,11 +149,19 @@
* is freed by GSO engine
*/
if (copy_destructor) {
+ int delta;
+
swap(gso_skb->sk, skb->sk);
swap(gso_skb->destructor, skb->destructor);
sum_truesize += skb->truesize;
- refcount_add(sum_truesize - gso_skb->truesize,
- &skb->sk->sk_wmem_alloc);
+ delta = sum_truesize - gso_skb->truesize;
+ /* In some pathological cases, delta can be negative.
+ * We need to either use refcount_add() or refcount_sub_and_test()
+ */
+ if (likely(delta >= 0))
+ refcount_add(delta, &skb->sk->sk_wmem_alloc);
+ else
+ WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc));
}
delta = htonl(oldlen + (skb_tail_pointer(skb) -
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 9722bf8..b4e421a 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -410,14 +410,14 @@
break;
}
- /* XXX when can this fail? */
- ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
- rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
+ rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
(long) ib_sg_dma_address(
ic->i_cm_id->device,
- &recv->r_frag->f_sg),
- ret);
+ &recv->r_frag->f_sg));
+
+ /* XXX when can this fail? */
+ ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
if (ret) {
rds_ib_conn_error(conn, "recv post on "
"%pI4 returned %d, disconnecting and "
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index bb831d4..e63af4e 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -259,7 +259,7 @@
__objtool_obj := $(objtree)/tools/objtool/objtool
-objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check)
+objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
ifndef CONFIG_FRAME_POINTER
objtool_args += --no-fp
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 6598fb7..9816590 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -829,6 +829,7 @@
#define I915_EXEC_FENCE_WAIT (1<<0)
#define I915_EXEC_FENCE_SIGNAL (1<<1)
+#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1))
__u32 flags;
};
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index c0e26ad..9b341584 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1757,11 +1757,14 @@
if (insn->dead_end)
return 0;
- insn = next_insn;
- if (!insn) {
+ if (!next_insn) {
+ if (state.cfa.base == CFI_UNDEFINED)
+ return 0;
WARN("%s: unexpected end of section", sec->name);
return 1;
}
+
+ insn = next_insn;
}
return 0;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 31e0f91..07f3299 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -70,7 +70,7 @@
printf("\n");
- exit(1);
+ exit(129);
}
static void handle_options(int *argc, const char ***argv)
@@ -86,9 +86,7 @@
break;
} else {
fprintf(stderr, "Unknown option: %s\n", cmd);
- fprintf(stderr, "\n Usage: %s\n",
- objtool_usage_string);
- exit(1);
+ cmd_usage();
}
(*argv)++;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 771ddab..d5d7fff 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1138,6 +1138,14 @@
return err;
}
+static void trace__symbols__exit(struct trace *trace)
+{
+ machine__exit(trace->host);
+ trace->host = NULL;
+
+ symbol__exit();
+}
+
static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
{
int idx;
@@ -2481,6 +2489,8 @@
}
out_delete_evlist:
+ trace__symbols__exit(trace);
+
perf_evlist__delete(evlist);
trace->evlist = NULL;
trace->live = false;
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 6680e4f..0257295 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -5,6 +5,7 @@
%option stack
%option bison-locations
%option yylineno
+%option reject
%{
#include <errno.h>
@@ -339,8 +340,8 @@
{num_hex} { return value(yyscanner, 16); }
{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); }
-{bpf_object} { if (!isbpf(yyscanner)) USER_REJECT; return str(yyscanner, PE_BPF_OBJECT); }
-{bpf_source} { if (!isbpf(yyscanner)) USER_REJECT; return str(yyscanner, PE_BPF_SOURCE); }
+{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
+{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
{name} { return pmu_str_check(yyscanner); }
"/" { BEGIN(config); return '/'; }
- { return '-'; }
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 961e3ee..66e5ce5 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -115,7 +115,15 @@
return;
}
- if (ar != expected_ar) {
+ /* The SDM says "bits 19:16 are undefined". Thanks. */
+ ar &= ~0xF0000;
+
+ /*
+ * NB: Different Linux versions do different things with the
+ * accessed bit in set_thread_area().
+ */
+ if (ar != expected_ar &&
+ (ldt || ar != (expected_ar | AR_ACCESSED))) {
printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
(ldt ? "LDT" : "GDT"), index, ar, expected_ar);
nerrs++;
@@ -129,30 +137,51 @@
}
}
-static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
- bool oldmode)
+static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
+ bool oldmode, bool ldt)
{
- int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
- desc, sizeof(*desc));
- if (ret < -1)
- errno = -ret;
- if (ret == 0) {
- uint32_t limit = desc->limit;
- if (desc->limit_in_pages)
- limit = (limit << 12) + 4095;
- check_valid_segment(desc->entry_number, 1, ar, limit, true);
- return true;
- } else if (errno == ENOSYS) {
- printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ struct user_desc desc = *d;
+ int ret;
+
+ if (!ldt) {
+#ifndef __i386__
+ /* No point testing set_thread_area in a 64-bit build */
return false;
+#endif
+ if (!gdt_entry_num)
+ return false;
+ desc.entry_number = gdt_entry_num;
+
+ ret = syscall(SYS_set_thread_area, &desc);
} else {
- if (desc->seg_32bit) {
- printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
+ ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+ &desc, sizeof(desc));
+
+ if (ret < -1)
+ errno = -ret;
+
+ if (ret != 0 && errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ return false;
+ }
+ }
+
+ if (ret == 0) {
+ uint32_t limit = desc.limit;
+ if (desc.limit_in_pages)
+ limit = (limit << 12) + 4095;
+ check_valid_segment(desc.entry_number, ldt, ar, limit, true);
+ return true;
+ } else {
+ if (desc.seg_32bit) {
+ printf("[FAIL]\tUnexpected %s failure %d\n",
+ ldt ? "modify_ldt" : "set_thread_area",
errno);
nerrs++;
return false;
} else {
- printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
+ printf("[OK]\t%s rejected 16 bit segment\n",
+ ldt ? "modify_ldt" : "set_thread_area");
return false;
}
}
@@ -160,7 +189,15 @@
static bool install_valid(const struct user_desc *desc, uint32_t ar)
{
- return install_valid_mode(desc, ar, false);
+ bool ret = install_valid_mode(desc, ar, false, true);
+
+ if (desc->contents <= 1 && desc->seg_32bit &&
+ !desc->seg_not_present) {
+ /* Should work in the GDT, too. */
+ install_valid_mode(desc, ar, false, false);
+ }
+
+ return ret;
}
static void install_invalid(const struct user_desc *desc, bool oldmode)
@@ -367,9 +404,24 @@
install_invalid(&desc, false);
desc.seg_not_present = 0;
- desc.read_exec_only = 0;
desc.seg_32bit = 1;
+ desc.read_exec_only = 0;
+ desc.limit = 0xfffff;
+
install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
+
+ desc.limit_in_pages = 1;
+
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G);
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G);
+ desc.contents = 1;
+ desc.read_exec_only = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
+
+ desc.limit = 0;
install_invalid(&desc, true);
}
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index 555e43c..7a1cc0e 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -189,17 +189,29 @@
#define u64 uint64_t
#ifdef __i386__
-#define SYS_mprotect_key 380
-#define SYS_pkey_alloc 381
-#define SYS_pkey_free 382
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 380
+#endif
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 381
+# define SYS_pkey_free 382
+#endif
#define REG_IP_IDX REG_EIP
#define si_pkey_offset 0x14
+
#else
-#define SYS_mprotect_key 329
-#define SYS_pkey_alloc 330
-#define SYS_pkey_free 331
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 329
+#endif
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 330
+# define SYS_pkey_free 331
+#endif
#define REG_IP_IDX REG_RIP
#define si_pkey_offset 0x20
+
#endif
void dump_mem(void *dumpme, int len_bytes)