Merge remote-tracking branch 'android-4.9' into 'android-4.9-eas-dev'
Fixed merge conflicts caused by 8a174b4749d3 ("sched/fair: prevent
possible infinite loop in sched_group_energy)
Change-Id: Iba35631a213b88f6361838f28573af3407568919
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 3acc4f1..4a5a887 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -436,6 +436,8 @@
- info on the magic SysRq key.
target/
- directory with info on generating TCM v4 fabric .ko modules
+tee.txt
+ - info on the TEE subsystem and drivers
this_cpu_ops.txt
- List rationale behind and the way to use this_cpu operations.
thermal/
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 4987417..dfd56ec 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -350,3 +350,19 @@
Description: AArch64 CPU registers
'identification' directory exposes the CPU ID registers for
identifying model and revision of the CPU.
+
+What: /sys/devices/system/cpu/vulnerabilities
+ /sys/devices/system/cpu/vulnerabilities/meltdown
+ /sys/devices/system/cpu/vulnerabilities/spectre_v1
+ /sys/devices/system/cpu/vulnerabilities/spectre_v2
+Date: January 2018
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Information about CPU vulnerabilities
+
+ The files are named after the code names of CPU
+ vulnerabilities. The output of those files reflects the
+ state of the CPUs in the system. Possible output values:
+
+ "Not affected" CPU is not affected by the vulnerability
+ "Vulnerable" CPU is affected and no mitigation in effect
+ "Mitigation: $M" CPU is affected and mitigation $M is in effect
diff --git a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt
new file mode 100644
index 0000000..d38834c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt
@@ -0,0 +1,31 @@
+OP-TEE Device Tree Bindings
+
+OP-TEE is a piece of software using hardware features to provide a Trusted
+Execution Environment. The security can be provided with ARM TrustZone, but
+also by virtualization or a separate chip.
+
+We're using "linaro" as the first part of the compatible property for
+the reference implementation maintained by Linaro.
+
+* OP-TEE based on ARM TrustZone required properties:
+
+- compatible : should contain "linaro,optee-tz"
+
+- method : The method of calling the OP-TEE Trusted OS. Permitted
+ values are:
+
+ "smc" : SMC #0, with the register assignments specified
+ in drivers/tee/optee/optee_smc.h
+
+ "hvc" : HVC #0, with the register assignments specified
+ in drivers/tee/optee/optee_smc.h
+
+
+
+Example:
+ firmware {
+ optee {
+ compatible = "linaro,optee-tz";
+ method = "smc";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/hwmon/jc42.txt b/Documentation/devicetree/bindings/hwmon/jc42.txt
index 07a2504..f569db5 100644
--- a/Documentation/devicetree/bindings/hwmon/jc42.txt
+++ b/Documentation/devicetree/bindings/hwmon/jc42.txt
@@ -34,6 +34,10 @@
- reg: I2C address
+Optional properties:
+- smbus-timeout-disable: When set, the smbus timeout function will be disabled.
+ This is not supported on all chips.
+
Example:
temp-sensor@1a {
diff --git a/Documentation/devicetree/bindings/usb/usb-device.txt b/Documentation/devicetree/bindings/usb/usb-device.txt
index 1c35e7b..03ab8f5 100644
--- a/Documentation/devicetree/bindings/usb/usb-device.txt
+++ b/Documentation/devicetree/bindings/usb/usb-device.txt
@@ -11,7 +11,7 @@
be used, but a device adhering to this binding may leave out all except
for usbVID,PID.
- reg: the port number which this device is connecting to, the range
- is 1-31.
+ is 1-255.
Example:
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index bceffff..d0526e0 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -156,6 +156,7 @@
lantiq Lantiq Semiconductor
lenovo Lenovo Group Ltd.
lg LG Corporation
+linaro Linaro Limited
linux Linux-specific binding
lltc Linear Technology Corporation
lsi LSI Corp. (LSI Logic)
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 81c7f2b..efb38da 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -308,6 +308,7 @@
0xA3 80-8F Port ACL in development:
<mailto:tlewis@mindspring.com>
0xA3 90-9F linux/dtlk.h
+0xA4 00-1F uapi/linux/tee.h Generic TEE subsystem
0xAA 00-3F linux/uapi/linux/userfaultfd.h
0xAB 00-1F linux/nbd.h
0xAC 00-1F linux/raw.h
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 92e6297..29a998d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1909,6 +1909,12 @@
kernel and module base offset ASLR (Address Space
Layout Randomization).
+ kasan_multi_shot
+ [KNL] Enforce KASAN (Kernel Address Sanitizer) to print
+ report on every invalid memory access. Without this
+ parameter KASAN will print report only for the first
+ invalid access.
+
keepinitrd [HW,ARM]
kernelcore= [KNL,X86,IA-64,PPC]
@@ -2697,6 +2703,11 @@
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
Equivalent to smt=1.
+ nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
+ (indirect branch prediction) vulnerability. System may
+ allow data leaks with this option, which is equivalent
+ to spectre_v2=off.
+
noxsave [BUGS=X86] Disables x86 extended register state save
and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state.
@@ -2801,11 +2812,11 @@
nopat [X86] Disable PAT (page attribute table extension of
pagetables) support.
+ nopcid [X86-64] Disable the PCID cpu feature.
+
norandmaps Don't use address space randomization. Equivalent to
echo 0 > /proc/sys/kernel/randomize_va_space
- noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops
-
noreplace-smp [X86-32,SMP] Don't replace SMP instructions
with UP alternatives
@@ -3329,6 +3340,21 @@
pt. [PARIDE]
See Documentation/blockdev/paride.txt.
+ pti= [X86_64] Control Page Table Isolation of user and
+ kernel address spaces. Disabling this feature
+ removes hardening, but improves performance of
+ system calls and interrupts.
+
+ on - unconditionally enable
+ off - unconditionally disable
+ auto - kernel detects whether your CPU model is
+ vulnerable to issues that PTI mitigates
+
+ Not specifying this option is equivalent to pti=auto.
+
+ nopti [X86_64]
+ Equivalent to pti=off
+
pty.legacy_count=
[KNL] Number of legacy pty's. Overwrites compiled-in
default number.
@@ -3933,6 +3959,29 @@
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/laptops/sonypi.txt
+ spectre_v2= [X86] Control mitigation of Spectre variant 2
+ (indirect branch speculation) vulnerability.
+
+ on - unconditionally enable
+ off - unconditionally disable
+ auto - kernel detects whether your CPU model is
+ vulnerable
+
+ Selecting 'on' will, and 'auto' may, choose a
+ mitigation method at run time according to the
+ CPU, the available microcode, the setting of the
+ CONFIG_RETPOLINE configuration option, and the
+ compiler with which the kernel was built.
+
+ Specific mitigations can also be selected manually:
+
+ retpoline - replace indirect branches
+ retpoline,generic - google's original retpoline
+ retpoline,amd - AMD-specific minimal thunk
+
+ Not specifying this option is equivalent to
+ spectre_v2=auto.
+
spia_io_base= [HW,MTD]
spia_fio_base=
spia_pedr=
diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt
new file mode 100644
index 0000000..e9e6cba
--- /dev/null
+++ b/Documentation/speculation.txt
@@ -0,0 +1,90 @@
+This document explains potential effects of speculation, and how undesirable
+effects can be mitigated portably using common APIs.
+
+===========
+Speculation
+===========
+
+To improve performance and minimize average latencies, many contemporary CPUs
+employ speculative execution techniques such as branch prediction, performing
+work which may be discarded at a later stage.
+
+Typically speculative execution cannot be observed from architectural state,
+such as the contents of registers. However, in some cases it is possible to
+observe its impact on microarchitectural state, such as the presence or
+absence of data in caches. Such state may form side-channels which can be
+observed to extract secret information.
+
+For example, in the presence of branch prediction, it is possible for bounds
+checks to be ignored by code which is speculatively executed. Consider the
+following code:
+
+ int load_array(int *array, unsigned int index)
+ {
+ if (index >= MAX_ARRAY_ELEMS)
+ return 0;
+ else
+ return array[index];
+ }
+
+Which, on arm64, may be compiled to an assembly sequence such as:
+
+ CMP <index>, #MAX_ARRAY_ELEMS
+ B.LT less
+ MOV <returnval>, #0
+ RET
+ less:
+ LDR <returnval>, [<array>, <index>]
+ RET
+
+It is possible that a CPU mis-predicts the conditional branch, and
+speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This
+value will subsequently be discarded, but the speculated load may affect
+microarchitectural state which can be subsequently measured.
+
+More complex sequences involving multiple dependent memory accesses may
+result in sensitive information being leaked. Consider the following
+code, building on the prior example:
+
+ int load_dependent_arrays(int *arr1, int *arr2, int index)
+ {
+ int val1, val2,
+
+ val1 = load_array(arr1, index);
+ val2 = load_array(arr2, val1);
+
+ return val2;
+ }
+
+Under speculation, the first call to load_array() may return the value
+of an out-of-bounds address, while the second call will influence
+microarchitectural state dependent on this value. This may provide an
+arbitrary read primitive.
+
+====================================
+Mitigating speculation side-channels
+====================================
+
+The kernel provides a generic API to ensure that bounds checks are
+respected even under speculation. Architectures which are affected by
+speculation-based side-channels are expected to implement these
+primitives.
+
+The array_index_nospec() helper in <linux/nospec.h> can be used to
+prevent information from being leaked via side-channels.
+
+A call to array_index_nospec(index, size) returns a sanitized index
+value that is bounded to [0, size) even under cpu speculation
+conditions.
+
+This can be used to protect the earlier load_array() example:
+
+ int load_array(int *array, unsigned int index)
+ {
+ if (index >= MAX_ARRAY_ELEMS)
+ return 0;
+ else {
+ index = array_index_nospec(index, MAX_ARRAY_ELEMS);
+ return array[index];
+ }
+ }
diff --git a/Documentation/tee.txt b/Documentation/tee.txt
new file mode 100644
index 0000000..56ea85f
--- /dev/null
+++ b/Documentation/tee.txt
@@ -0,0 +1,127 @@
+=============
+TEE subsystem
+=============
+
+This document describes the TEE subsystem in Linux.
+
+A TEE (Trusted Execution Environment) is a trusted OS running in some
+secure environment, for example, TrustZone on ARM CPUs, or a separate
+secure co-processor etc. A TEE driver handles the details needed to
+communicate with the TEE.
+
+This subsystem deals with:
+
+- Registration of TEE drivers
+
+- Managing shared memory between Linux and the TEE
+
+- Providing a generic API to the TEE
+
+The TEE interface
+=================
+
+include/uapi/linux/tee.h defines the generic interface to a TEE.
+
+User space (the client) connects to the driver by opening /dev/tee[0-9]* or
+/dev/teepriv[0-9]*.
+
+- TEE_IOC_SHM_ALLOC allocates shared memory and returns a file descriptor
+ which user space can mmap. When user space doesn't need the file
+ descriptor any more, it should be closed. When shared memory isn't needed
+ any longer it should be unmapped with munmap() to allow the reuse of
+ memory.
+
+- TEE_IOC_VERSION lets user space know which TEE this driver handles and
+ the its capabilities.
+
+- TEE_IOC_OPEN_SESSION opens a new session to a Trusted Application.
+
+- TEE_IOC_INVOKE invokes a function in a Trusted Application.
+
+- TEE_IOC_CANCEL may cancel an ongoing TEE_IOC_OPEN_SESSION or TEE_IOC_INVOKE.
+
+- TEE_IOC_CLOSE_SESSION closes a session to a Trusted Application.
+
+There are two classes of clients, normal clients and supplicants. The latter is
+a helper process for the TEE to access resources in Linux, for example file
+system access. A normal client opens /dev/tee[0-9]* and a supplicant opens
+/dev/teepriv[0-9].
+
+Much of the communication between clients and the TEE is opaque to the
+driver. The main job for the driver is to receive requests from the
+clients, forward them to the TEE and send back the results. In the case of
+supplicants the communication goes in the other direction, the TEE sends
+requests to the supplicant which then sends back the result.
+
+OP-TEE driver
+=============
+
+The OP-TEE driver handles OP-TEE [1] based TEEs. Currently it is only the ARM
+TrustZone based OP-TEE solution that is supported.
+
+Lowest level of communication with OP-TEE builds on ARM SMC Calling
+Convention (SMCCC) [2], which is the foundation for OP-TEE's SMC interface
+[3] used internally by the driver. Stacked on top of that is OP-TEE Message
+Protocol [4].
+
+OP-TEE SMC interface provides the basic functions required by SMCCC and some
+additional functions specific for OP-TEE. The most interesting functions are:
+
+- OPTEE_SMC_FUNCID_CALLS_UID (part of SMCCC) returns the version information
+ which is then returned by TEE_IOC_VERSION
+
+- OPTEE_SMC_CALL_GET_OS_UUID returns the particular OP-TEE implementation, used
+ to tell, for instance, a TrustZone OP-TEE apart from an OP-TEE running on a
+ separate secure co-processor.
+
+- OPTEE_SMC_CALL_WITH_ARG drives the OP-TEE message protocol
+
+- OPTEE_SMC_GET_SHM_CONFIG lets the driver and OP-TEE agree on which memory
+ range to used for shared memory between Linux and OP-TEE.
+
+The GlobalPlatform TEE Client API [5] is implemented on top of the generic
+TEE API.
+
+Picture of the relationship between the different components in the
+OP-TEE architecture::
+
+ User space Kernel Secure world
+ ~~~~~~~~~~ ~~~~~~ ~~~~~~~~~~~~
+ +--------+ +-------------+
+ | Client | | Trusted |
+ +--------+ | Application |
+ /\ +-------------+
+ || +----------+ /\
+ || |tee- | ||
+ || |supplicant| \/
+ || +----------+ +-------------+
+ \/ /\ | TEE Internal|
+ +-------+ || | API |
+ + TEE | || +--------+--------+ +-------------+
+ | Client| || | TEE | OP-TEE | | OP-TEE |
+ | API | \/ | subsys | driver | | Trusted OS |
+ +-------+----------------+----+-------+----+-----------+-------------+
+ | Generic TEE API | | OP-TEE MSG |
+ | IOCTL (TEE_IOC_*) | | SMCCC (OPTEE_SMC_CALL_*) |
+ +-----------------------------+ +------------------------------+
+
+RPC (Remote Procedure Call) are requests from secure world to kernel driver
+or tee-supplicant. An RPC is identified by a special range of SMCCC return
+values from OPTEE_SMC_CALL_WITH_ARG. RPC messages which are intended for the
+kernel are handled by the kernel driver. Other RPC messages will be forwarded to
+tee-supplicant without further involvement of the driver, except switching
+shared memory buffer representation.
+
+References
+==========
+
+[1] https://github.com/OP-TEE/optee_os
+
+[2] http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html
+
+[3] drivers/tee/optee/optee_smc.h
+
+[4] drivers/tee/optee/optee_msg.h
+
+[5] http://www.globalplatform.org/specificationsdevice.asp look for
+ "TEE Client API Specification v1.0" and click download.
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
new file mode 100644
index 0000000..5cd5843
--- /dev/null
+++ b/Documentation/x86/pti.txt
@@ -0,0 +1,186 @@
+Overview
+========
+
+Page Table Isolation (pti, previously known as KAISER[1]) is a
+countermeasure against attacks on the shared user/kernel address
+space such as the "Meltdown" approach[2].
+
+To mitigate this class of attacks, we create an independent set of
+page tables for use only when running userspace applications. When
+the kernel is entered via syscalls, interrupts or exceptions, the
+page tables are switched to the full "kernel" copy. When the system
+switches back to user mode, the user copy is used again.
+
+The userspace page tables contain only a minimal amount of kernel
+data: only what is needed to enter/exit the kernel such as the
+entry/exit functions themselves and the interrupt descriptor table
+(IDT). There are a few strictly unnecessary things that get mapped
+such as the first C function when entering an interrupt (see
+comments in pti.c).
+
+This approach helps to ensure that side-channel attacks leveraging
+the paging structures do not function when PTI is enabled. It can be
+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
+Once enabled at compile-time, it can be disabled at boot with the
+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+
+Page Table Management
+=====================
+
+When PTI is enabled, the kernel manages two sets of page tables.
+The first set is very similar to the single set which is present in
+kernels without PTI. This includes a complete mapping of userspace
+that the kernel can use for things like copy_to_user().
+
+Although _complete_, the user portion of the kernel page tables is
+crippled by setting the NX bit in the top level. This ensures
+that any missed kernel->user CR3 switch will immediately crash
+userspace upon executing its first instruction.
+
+The userspace page tables map only the kernel data needed to enter
+and exit the kernel. This data is entirely contained in the 'struct
+cpu_entry_area' structure which is placed in the fixmap which gives
+each CPU's copy of the area a compile-time-fixed virtual address.
+
+For new userspace mappings, the kernel makes the entries in its
+page tables like normal. The only difference is when the kernel
+makes entries in the top (PGD) level. In addition to setting the
+entry in the main kernel PGD, a copy of the entry is made in the
+userspace page tables' PGD.
+
+This sharing at the PGD level also inherently shares all the lower
+layers of the page tables. This leaves a single, shared set of
+userspace page tables to manage. One PTE to lock, one set of
+accessed bits, dirty bits, etc...
+
+Overhead
+========
+
+Protection against side-channel attacks is important. But,
+this protection comes at a cost:
+
+1. Increased Memory Use
+ a. Each process now needs an order-1 PGD instead of order-0.
+ (Consumes an additional 4k per process).
+ b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
+ aligned so that it can be mapped by setting a single PMD
+ entry. This consumes nearly 2MB of RAM once the kernel
+ is decompressed, but no space in the kernel image itself.
+
+2. Runtime Cost
+ a. CR3 manipulation to switch between the page table copies
+ must be done at interrupt, syscall, and exception entry
+ and exit (it can be skipped when the kernel is interrupted,
+ though.) Moves to CR3 are on the order of a hundred
+ cycles, and are required at every entry and exit.
+ b. A "trampoline" must be used for SYSCALL entry. This
+ trampoline depends on a smaller set of resources than the
+ non-PTI SYSCALL entry code, so requires mapping fewer
+ things into the userspace page tables. The downside is
+ that stacks must be switched at entry time.
+ c. Global pages are disabled for all kernel structures not
+ mapped into both kernel and userspace page tables. This
+ feature of the MMU allows different processes to share TLB
+ entries mapping the kernel. Losing the feature means more
+ TLB misses after a context switch. The actual loss of
+ performance is very small, however, never exceeding 1%.
+ d. Process Context IDentifiers (PCID) is a CPU feature that
+ allows us to skip flushing the entire TLB when switching page
+ tables by setting a special bit in CR3 when the page tables
+ are changed. This makes switching the page tables (at context
+ switch, or kernel entry/exit) cheaper. But, on systems with
+ PCID support, the context switch code must flush both the user
+ and kernel entries out of the TLB. The user PCID TLB flush is
+ deferred until the exit to userspace, minimizing the cost.
+ See intel.com/sdm for the gory PCID/INVPCID details.
+ e. The userspace page tables must be populated for each new
+ process. Even without PTI, the shared kernel mappings
+ are created by copying top-level (PGD) entries into each
+ new process. But, with PTI, there are now *two* kernel
+ mappings: one in the kernel page tables that maps everything
+ and one for the entry/exit structures. At fork(), we need to
+ copy both.
+ f. In addition to the fork()-time copying, there must also
+ be an update to the userspace PGD any time a set_pgd() is done
+ on a PGD used to map userspace. This ensures that the kernel
+ and userspace copies always map the same userspace
+ memory.
+ g. On systems without PCID support, each CR3 write flushes
+ the entire TLB. That means that each syscall, interrupt
+ or exception flushes the TLB.
+ h. INVPCID is a TLB-flushing instruction which allows flushing
+ of TLB entries for non-current PCIDs. Some systems support
+ PCIDs, but do not support INVPCID. On these systems, addresses
+ can only be flushed from the TLB for the current PCID. When
+ flushing a kernel address, we need to flush all PCIDs, so a
+ single kernel address flush will require a TLB-flushing CR3
+ write upon the next use of every PCID.
+
+Possible Future Work
+====================
+1. We can be more careful about not actually writing to CR3
+ unless its value is actually changed.
+2. Allow PTI to be enabled/disabled at runtime in addition to the
+ boot-time switching.
+
+Testing
+========
+
+To test stability of PTI, the following test procedure is recommended,
+ideally doing all of these in parallel:
+
+1. Set CONFIG_DEBUG_ENTRY=y
+2. Run several copies of all of the tools/testing/selftests/x86/ tests
+ (excluding MPX and protection_keys) in a loop on multiple CPUs for
+ several minutes. These tests frequently uncover corner cases in the
+ kernel entry code. In general, old kernels might cause these tests
+ themselves to crash, but they should never crash the kernel.
+3. Run the 'perf' tool in a mode (top or record) that generates many
+ frequent performance monitoring non-maskable interrupts (see "NMI"
+ in /proc/interrupts). This exercises the NMI entry/exit code which
+ is known to trigger bugs in code paths that did not expect to be
+ interrupted, including nested NMIs. Using "-c" boosts the rate of
+ NMIs, and using two -c with separate counters encourages nested NMIs
+ and less deterministic behavior.
+
+ while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
+
+4. Launch a KVM virtual machine.
+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
+ This has been a lightly-tested code path and needs extra scrutiny.
+
+Debugging
+=========
+
+Bugs in PTI cause a few different signatures of crashes
+that are worth noting here.
+
+ * Failures of the selftests/x86 code. Usually a bug in one of the
+ more obscure corners of entry_64.S
+ * Crashes in early boot, especially around CPU bringup. Bugs
+ in the trampoline code or mappings cause these.
+ * Crashes at the first interrupt. Caused by bugs in entry_64.S,
+ like screwing up a page table switch. Also caused by
+ incorrectly mapping the IRQ handler entry code.
+ * Crashes at the first NMI. The NMI code is separate from main
+ interrupt handlers and can have bugs that do not affect
+ normal interrupts. Also caused by incorrectly mapping NMI
+ code. NMIs that interrupt the entry code must be very
+ careful and can be the cause of crashes that show up when
+ running perf.
+ * Kernel crashes at the first exit to userspace. entry_64.S
+ bugs, or failing to map some of the exit code.
+ * Crashes at first interrupt that interrupts userspace. The paths
+ in entry_64.S that return to userspace are sometimes separate
+ from the ones that return to the kernel.
+ * Double faults: overflowing the kernel stack because of page
+ faults upon page faults. Caused by touching non-pti-mapped
+ data in the entry code, or forgetting to switch to kernel
+ CR3 before calling into C functions which are not pti-mapped.
+ * Userspace segfaults early in boot, sometimes manifesting
+ as mount(8) failing to mount the rootfs. These have
+ tended to be TLB invalidation issues. Usually invalidating
+ the wrong PCID, or otherwise missing an invalidation.
+
+1. https://gruss.cc/files/kaiser.pdf
+2. https://meltdownattack.com/meltdown.pdf
diff --git a/MAINTAINERS b/MAINTAINERS
index 63cefa6..a419303 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9010,6 +9010,11 @@
F: drivers/oprofile/
F: include/linux/oprofile.h
+OP-TEE DRIVER
+M: Jens Wiklander <jens.wiklander@linaro.org>
+S: Maintained
+F: drivers/tee/optee/
+
ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
M: Mark Fasheh <mfasheh@versity.com>
M: Joel Becker <jlbec@evilplan.org>
@@ -10655,6 +10660,14 @@
F: include/linux/stm.h
F: include/uapi/linux/stm.h
+TEE SUBSYSTEM
+M: Jens Wiklander <jens.wiklander@linaro.org>
+S: Maintained
+F: include/linux/tee_drv.h
+F: include/uapi/linux/tee.h
+F: drivers/tee/
+F: Documentation/tee.txt
+
THUNDERBOLT DRIVER
M: Andreas Noever <andreas.noever@gmail.com>
S: Maintained
diff --git a/Makefile b/Makefile
index 7c9313c..aeb2708 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 9
-SUBLEVEL = 66
+SUBLEVEL = 82
EXTRAVERSION =
NAME = Roaring Lionus
@@ -370,9 +370,6 @@
CFLAGS_KERNEL =
AFLAGS_KERNEL =
LDFLAGS_vmlinux =
-CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,)
-CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,)
-
# Use USERINCLUDE when you must reference the UAPI directories only.
USERINCLUDE := \
@@ -393,21 +390,19 @@
LINUXINCLUDE += $(filter-out $(LINUXINCLUDE),$(USERINCLUDE))
-KBUILD_CPPFLAGS := -D__KERNEL__
-
+KBUILD_AFLAGS := -D__ASSEMBLY__
KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -fno-common \
-Werror-implicit-function-declaration \
-Wno-format-security \
- -std=gnu89 $(call cc-option,-fno-PIE)
-
-
+ -std=gnu89
+KBUILD_CPPFLAGS := -D__KERNEL__
KBUILD_AFLAGS_KERNEL :=
KBUILD_CFLAGS_KERNEL :=
-KBUILD_AFLAGS := -D__ASSEMBLY__ $(call cc-option,-fno-PIE)
KBUILD_AFLAGS_MODULE := -DMODULE
KBUILD_CFLAGS_MODULE := -DMODULE
KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
+GCC_PLUGINS_CFLAGS :=
# Read KERNELRELEASE from include/config/kernel.release (if it exists)
KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null)
@@ -420,7 +415,7 @@
export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
-export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KCOV CFLAGS_KASAN CFLAGS_UBSAN
+export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_KASAN CFLAGS_UBSAN
export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
@@ -620,6 +615,12 @@
# Defaults to vmlinux, but the arch makefile usually adds further targets
all: vmlinux
+KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
+KBUILD_AFLAGS += $(call cc-option,-fno-PIE)
+CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,)
+CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,)
+export CFLAGS_GCOV CFLAGS_KCOV
+
# The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default
# values of the respective KBUILD_* variables
ARCH_CPPFLAGS :=
@@ -801,6 +802,9 @@
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
+# Make sure -fstack-check isn't enabled (like gentoo apparently did)
+KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)
+
# conserve stack if available
KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h
index 2b0ac42..412bb3c 100644
--- a/arch/alpha/kernel/pci_impl.h
+++ b/arch/alpha/kernel/pci_impl.h
@@ -143,7 +143,8 @@
};
#if defined(CONFIG_ALPHA_SRM) && \
- (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA))
+ (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA) || \
+ defined(CONFIG_ALPHA_AVANTI))
# define NEED_SRM_SAVE_RESTORE
#else
# undef NEED_SRM_SAVE_RESTORE
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index b483156..60c17b9 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -265,12 +265,13 @@
application calling fork. */
if (clone_flags & CLONE_SETTLS)
childti->pcb.unique = regs->r20;
+ else
+ regs->r20 = 0; /* OSF/1 has some strange fork() semantics. */
childti->pcb.usp = usp ?: rdusp();
*childregs = *regs;
childregs->r0 = 0;
childregs->r19 = 0;
childregs->r20 = 1; /* OSF/1 has some strange fork() semantics. */
- regs->r20 = 0;
stack = ((struct switch_stack *) regs) - 1;
*childstack = *stack;
childstack->r26 = (unsigned long) ret_from_fork;
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 74aceea..32ba92c 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -158,11 +158,16 @@
for(i=0; i < kstack_depth_to_print; i++) {
if (((long) stack & (THREAD_SIZE-1)) == 0)
break;
- if (i && ((i % 4) == 0))
- printk("\n ");
- printk("%016lx ", *stack++);
+ if ((i % 4) == 0) {
+ if (i)
+ pr_cont("\n");
+ printk(" ");
+ } else {
+ pr_cont(" ");
+ }
+ pr_cont("%016lx", *stack++);
}
- printk("\n");
+ pr_cont("\n");
dik_show_trace(sp);
}
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index 41faf17..0684fd2 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -673,6 +673,7 @@
return 0;
__asm__ __volatile__(
+ " mov lp_count, %5 \n"
" lp 3f \n"
"1: ldb.ab %3, [%2, 1] \n"
" breq.d %3, 0, 3f \n"
@@ -689,8 +690,8 @@
" .word 1b, 4b \n"
" .previous \n"
: "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
- : "g"(-EFAULT), "l"(count)
- : "memory");
+ : "g"(-EFAULT), "r"(count)
+ : "lp_count", "lp_start", "lp_end", "memory");
return res;
}
diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index 975c36e..8e6b393 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -668,6 +668,7 @@
ti,non-removable;
bus-width = <4>;
cap-power-off-card;
+ keep-power-in-suspend;
pinctrl-names = "default";
pinctrl-0 = <&mmc2_pins>;
diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi
index 7c9e0fa..65e0db1 100644
--- a/arch/arm/boot/dts/bcm-nsp.dtsi
+++ b/arch/arm/boot/dts/bcm-nsp.dtsi
@@ -85,7 +85,7 @@
timer@20200 {
compatible = "arm,cortex-a9-global-timer";
reg = <0x20200 0x100>;
- interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
clocks = <&periph_clk>;
};
@@ -93,7 +93,7 @@
compatible = "arm,cortex-a9-twd-timer";
reg = <0x20600 0x20>;
interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) |
- IRQ_TYPE_LEVEL_HIGH)>;
+ IRQ_TYPE_EDGE_RISING)>;
clocks = <&periph_clk>;
};
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 064d84f..ce54a70 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -282,6 +282,7 @@
device_type = "pci";
ranges = <0x81000000 0 0 0x03000 0 0x00010000
0x82000000 0 0x20013000 0x13000 0 0xffed000>;
+ bus-range = <0x00 0xff>;
#interrupt-cells = <1>;
num-lanes = <1>;
linux,pci-domain = <0>;
@@ -318,6 +319,7 @@
device_type = "pci";
ranges = <0x81000000 0 0 0x03000 0 0x00010000
0x82000000 0 0x30013000 0x13000 0 0xffed000>;
+ bus-range = <0x00 0xff>;
#interrupt-cells = <1>;
num-lanes = <1>;
linux,pci-domain = <1>;
diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
index cf2f524..27cc913 100644
--- a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
+++ b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
@@ -53,7 +53,8 @@
};
pinctrl: pin-controller@10000 {
- pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>;
+ pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header
+ &pmx_gpio_header_gpo>;
pinctrl-names = "default";
pmx_uart0: pmx-uart0 {
@@ -85,11 +86,16 @@
* ground.
*/
pmx_gpio_header: pmx-gpio-header {
- marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28",
+ marvell,pins = "mpp17", "mpp29", "mpp28",
"mpp35", "mpp34", "mpp40";
marvell,function = "gpio";
};
+ pmx_gpio_header_gpo: pxm-gpio-header-gpo {
+ marvell,pins = "mpp7";
+ marvell,function = "gpo";
+ };
+
pmx_gpio_init: pmx-init {
marvell,pins = "mpp38";
marvell,function = "gpio";
diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
index 08cce17..b4575bb 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
@@ -192,7 +192,7 @@
interrupts-extended = <&intc 83 &omap3_pmx_core 0x11a>;
pinctrl-names = "default";
pinctrl-0 = <&mmc1_pins &mmc1_cd>;
- cd-gpios = <&gpio4 31 IRQ_TYPE_LEVEL_LOW>; /* gpio127 */
+ cd-gpios = <&gpio4 31 GPIO_ACTIVE_LOW>; /* gpio127 */
vmmc-supply = <&vmmc1>;
bus-width = <4>;
cap-power-off-card;
@@ -249,9 +249,9 @@
OMAP3_CORE1_IOPAD(0x2110, PIN_INPUT | MUX_MODE0) /* cam_xclka.cam_xclka */
OMAP3_CORE1_IOPAD(0x2112, PIN_INPUT | MUX_MODE0) /* cam_pclk.cam_pclk */
- OMAP3_CORE1_IOPAD(0x2114, PIN_INPUT | MUX_MODE0) /* cam_d0.cam_d0 */
- OMAP3_CORE1_IOPAD(0x2116, PIN_INPUT | MUX_MODE0) /* cam_d1.cam_d1 */
- OMAP3_CORE1_IOPAD(0x2118, PIN_INPUT | MUX_MODE0) /* cam_d2.cam_d2 */
+ OMAP3_CORE1_IOPAD(0x2116, PIN_INPUT | MUX_MODE0) /* cam_d0.cam_d0 */
+ OMAP3_CORE1_IOPAD(0x2118, PIN_INPUT | MUX_MODE0) /* cam_d1.cam_d1 */
+ OMAP3_CORE1_IOPAD(0x211a, PIN_INPUT | MUX_MODE0) /* cam_d2.cam_d2 */
OMAP3_CORE1_IOPAD(0x211c, PIN_INPUT | MUX_MODE0) /* cam_d3.cam_d3 */
OMAP3_CORE1_IOPAD(0x211e, PIN_INPUT | MUX_MODE0) /* cam_d4.cam_d4 */
OMAP3_CORE1_IOPAD(0x2120, PIN_INPUT | MUX_MODE0) /* cam_d5.cam_d5 */
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 714da33..5d49b60 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -11,6 +11,7 @@
CONFIG_NR_CPUS=8
CONFIG_AEABI=y
CONFIG_HIGHMEM=y
+CONFIG_CMA=y
CONFIG_ARM_APPENDED_DTB=y
CONFIG_ARM_ATAG_DTB_COMPAT=y
CONFIG_CPU_FREQ=y
@@ -35,6 +36,7 @@
# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DMA_CMA=y
CONFIG_BLK_DEV_SD=y
CONFIG_ATA=y
CONFIG_AHCI_SUNXI=y
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 68b06f9..12f99fd 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -516,4 +516,22 @@
#endif
.endm
+ .macro bug, msg, line
+#ifdef CONFIG_THUMB2_KERNEL
+1: .inst 0xde02
+#else
+1: .inst 0xe7f001f2
+#endif
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+ .pushsection .rodata.str, "aMS", %progbits, 1
+2: .asciz "\msg"
+ .popsection
+ .pushsection __bug_table, "aw"
+ .align 2
+ .word 1b, 2b
+ .hword \line
+ .popsection
+#endif
+ .endm
+
#endif /* __ASM_ASSEMBLER_H__ */
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index e22089f..98d6de1 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -161,8 +161,7 @@
#else
#define VTTBR_X (5 - KVM_T0SZ)
#endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT _AC(48, ULL)
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
@@ -209,6 +208,7 @@
#define HSR_EC_IABT_HYP (0x21)
#define HSR_EC_DABT (0x24)
#define HSR_EC_DABT_HYP (0x25)
+#define HSR_EC_MAX (0x3f)
#define HSR_WFI_IS_WFE (_AC(1, UL) << 0)
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index f555bb3..683d923 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -18,7 +18,6 @@
void register_undef_hook(struct undef_hook *hook);
void unregister_undef_hook(struct undef_hook *hook);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static inline int __in_irqentry_text(unsigned long ptr)
{
extern char __irqentry_text_start[];
@@ -27,12 +26,6 @@
return ptr >= (unsigned long)&__irqentry_text_start &&
ptr < (unsigned long)&__irqentry_text_end;
}
-#else
-static inline int __in_irqentry_text(unsigned long ptr)
-{
- return 0;
-}
-#endif
static inline int in_exception_text(unsigned long ptr)
{
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 1f59ea05..b7e0125 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -478,11 +478,10 @@
arm_copy_from_user(void *to, const void __user *from, unsigned long n);
static inline unsigned long __must_check
-__copy_from_user(void *to, const void __user *from, unsigned long n)
+__arch_copy_from_user(void *to, const void __user *from, unsigned long n)
{
unsigned int __ua_flags;
- check_object_size(to, n, false);
__ua_flags = uaccess_save_and_enable();
n = arm_copy_from_user(to, from, n);
uaccess_restore(__ua_flags);
@@ -495,18 +494,15 @@
__copy_to_user_std(void __user *to, const void *from, unsigned long n);
static inline unsigned long __must_check
-__copy_to_user(void __user *to, const void *from, unsigned long n)
+__arch_copy_to_user(void __user *to, const void *from, unsigned long n)
{
#ifndef CONFIG_UACCESS_WITH_MEMCPY
unsigned int __ua_flags;
-
- check_object_size(from, n, true);
__ua_flags = uaccess_save_and_enable();
n = arm_copy_to_user(to, from, n);
uaccess_restore(__ua_flags);
return n;
#else
- check_object_size(from, n, true);
return arm_copy_to_user(to, from, n);
#endif
}
@@ -526,25 +522,49 @@
}
#else
-#define __copy_from_user(to, from, n) (memcpy(to, (void __force *)from, n), 0)
-#define __copy_to_user(to, from, n) (memcpy((void __force *)to, from, n), 0)
+#define __arch_copy_from_user(to, from, n) \
+ (memcpy(to, (void __force *)from, n), 0)
+#define __arch_copy_to_user(to, from, n) \
+ (memcpy((void __force *)to, from, n), 0)
#define __clear_user(addr, n) (memset((void __force *)addr, 0, n), 0)
#endif
-static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
+static inline unsigned long __must_check
+__copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ check_object_size(to, n, false);
+ return __arch_copy_from_user(to, from, n);
+}
+
+static inline unsigned long __must_check
+copy_from_user(void *to, const void __user *from, unsigned long n)
{
unsigned long res = n;
+
+ check_object_size(to, n, false);
+
if (likely(access_ok(VERIFY_READ, from, n)))
- res = __copy_from_user(to, from, n);
+ res = __arch_copy_from_user(to, from, n);
if (unlikely(res))
memset(to + (n - res), 0, res);
return res;
}
-static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
+static inline unsigned long __must_check
+__copy_to_user(void __user *to, const void *from, unsigned long n)
{
+ check_object_size(from, n, true);
+
+ return __arch_copy_to_user(to, from, n);
+}
+
+static inline unsigned long __must_check
+copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ check_object_size(from, n, true);
+
if (access_ok(VERIFY_WRITE, to, n))
- n = __copy_to_user(to, from, n);
+ n = __arch_copy_to_user(to, from, n);
return n;
}
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 6391728..e056c9a 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -299,6 +299,8 @@
mov r2, sp
ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr
ldr lr, [r2, #\offset + S_PC]! @ get pc
+ tst r1, #PSR_I_BIT | 0x0f
+ bne 1f
msr spsr_cxsf, r1 @ save in spsr_svc
#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
@ We must avoid clrex due to Cortex-A15 erratum #830321
@@ -313,6 +315,7 @@
@ after ldm {}^
add sp, sp, #\offset + PT_REGS_SIZE
movs pc, lr @ return & move spsr_svc into cpsr
+1: bug "Returning to usermode but unexpected PSR bits set?", \@
#elif defined(CONFIG_CPU_V7M)
@ V7M restore.
@ Note that we don't need to do clrex here as clearing the local
@@ -328,6 +331,8 @@
ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr
ldr lr, [sp, #\offset + S_PC] @ get pc
add sp, sp, #\offset + S_SP
+ tst r1, #PSR_I_BIT | 0x0f
+ bne 1f
msr spsr_cxsf, r1 @ save in spsr_svc
@ We must avoid clrex due to Cortex-A15 erratum #830321
@@ -340,6 +345,7 @@
.endif
add sp, sp, #PT_REGS_SIZE - S_SP
movs pc, lr @ return & move spsr_svc into cpsr
+1: bug "Returning to usermode but unexpected PSR bits set?", \@
#endif /* !CONFIG_THUMB2_KERNEL */
.endm
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 19b5f5c..c38bfbe 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -1165,6 +1165,7 @@
cpu_hyp_reset();
return NOTIFY_OK;
+ case CPU_PM_ENTER_FAILED:
case CPU_PM_EXIT:
if (__this_cpu_read(kvm_arm_hardware_enabled))
/* The hardware was enabled before suspend. */
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 066b6d4..4e57ebc 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -38,7 +38,7 @@
ret = kvm_psci_call(vcpu);
if (ret < 0) {
- kvm_inject_undefined(vcpu);
+ vcpu_set_reg(vcpu, 0, ~0UL);
return 1;
}
@@ -47,7 +47,16 @@
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- kvm_inject_undefined(vcpu);
+ /*
+ * "If an SMC instruction executed at Non-secure EL1 is
+ * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
+ * Trap exception, not a Secure Monitor Call exception [...]"
+ *
+ * We need to advance the PC after the trap, as it would
+ * otherwise return to the same address...
+ */
+ vcpu_set_reg(vcpu, 0, ~0UL);
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
return 1;
}
@@ -79,7 +88,19 @@
return 1;
}
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+ kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",
+ hsr);
+
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
+ [0 ... HSR_EC_MAX] = kvm_handle_unknown_ec,
[HSR_EC_WFI] = kvm_handle_wfx,
[HSR_EC_CP15_32] = kvm_handle_cp15_32,
[HSR_EC_CP15_64] = kvm_handle_cp15_64,
@@ -98,13 +119,6 @@
{
u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
- if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
- !arm_exit_handlers[hsr_ec]) {
- kvm_err("Unknown exception class: hsr: %#08x\n",
- (unsigned int)kvm_vcpu_get_hsr(vcpu));
- BUG();
- }
-
return arm_exit_handlers[hsr_ec];
}
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index b6e715f..dac7ceb 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -112,7 +112,7 @@
}
trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
- data);
+ &data);
data = vcpu_data_host_to_guest(vcpu, data, len);
vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
}
@@ -182,14 +182,14 @@
data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
len);
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
kvm_mmio_write_buf(data_buf, len, data);
ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
data_buf);
} else {
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
- fault_ipa, 0);
+ fault_ipa, NULL);
ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
data_buf);
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 2206e0e..2a35c19 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1284,7 +1284,7 @@
return -EFAULT;
}
- if (is_vm_hugetlb_page(vma) && !logging_active) {
+ if (vma_kernel_pagesize(vma) && !logging_active) {
hugetlb = true;
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
} else {
diff --git a/arch/arm/mach-omap1/dma.c b/arch/arm/mach-omap1/dma.c
index f6ba589..c821c1d 100644
--- a/arch/arm/mach-omap1/dma.c
+++ b/arch/arm/mach-omap1/dma.c
@@ -32,7 +32,6 @@
#include "soc.h"
#define OMAP1_DMA_BASE (0xfffed800)
-#define OMAP1_LOGICAL_DMA_CH_COUNT 17
static u32 enable_1510_mode;
@@ -348,8 +347,6 @@
goto exit_iounmap;
}
- d->lch_count = OMAP1_LOGICAL_DMA_CH_COUNT;
-
/* Valid attributes for omap1 plus processors */
if (cpu_is_omap15xx())
d->dev_caps = ENABLE_1510_MODE;
@@ -366,13 +363,14 @@
d->dev_caps |= CLEAR_CSR_ON_READ;
d->dev_caps |= IS_WORD_16;
- if (cpu_is_omap15xx())
- d->chan_count = 9;
- else if (cpu_is_omap16xx() || cpu_is_omap7xx()) {
- if (!(d->dev_caps & ENABLE_1510_MODE))
- d->chan_count = 16;
+ /* available logical channels */
+ if (cpu_is_omap15xx()) {
+ d->lch_count = 9;
+ } else {
+ if (d->dev_caps & ENABLE_1510_MODE)
+ d->lch_count = 9;
else
- d->chan_count = 9;
+ d->lch_count = 16;
}
p = dma_plat_info;
diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 8633c70..2944af8 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -367,7 +367,7 @@
return ret;
}
-void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
+int gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
{
int err;
struct device *dev = &gpmc_onenand_device.dev;
@@ -393,15 +393,17 @@
if (err < 0) {
dev_err(dev, "Cannot request GPMC CS %d, error %d\n",
gpmc_onenand_data->cs, err);
- return;
+ return err;
}
gpmc_onenand_resource.end = gpmc_onenand_resource.start +
ONENAND_IO_SIZE - 1;
- if (platform_device_register(&gpmc_onenand_device) < 0) {
+ err = platform_device_register(&gpmc_onenand_device);
+ if (err) {
dev_err(dev, "Unable to register OneNAND device\n");
gpmc_cs_free(gpmc_onenand_data->cs);
- return;
}
+
+ return err;
}
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 1cc4a6f..bca5415 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -3828,16 +3828,20 @@
* Return: 0 if device named @dev_name is not likely to be accessible,
* or 1 if it is likely to be accessible.
*/
-static int __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,
- const char *dev_name)
+static bool __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,
+ const char *dev_name)
{
+ struct device_node *node;
+ bool available;
+
if (!bus)
- return (omap_type() == OMAP2_DEVICE_TYPE_GP) ? 1 : 0;
+ return omap_type() == OMAP2_DEVICE_TYPE_GP;
- if (of_device_is_available(of_find_node_by_name(bus, dev_name)))
- return 1;
+ node = of_get_child_by_name(bus, dev_name);
+ available = of_device_is_available(node);
+ of_node_put(node);
- return 0;
+ return available;
}
int __init omap3xxx_hwmod_init(void)
@@ -3906,15 +3910,20 @@
if (h_sham && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "sham")) {
r = omap_hwmod_register_links(h_sham);
- if (r < 0)
+ if (r < 0) {
+ of_node_put(bus);
return r;
+ }
}
if (h_aes && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "aes")) {
r = omap_hwmod_register_links(h_aes);
- if (r < 0)
+ if (r < 0) {
+ of_node_put(bus);
return r;
+ }
}
+ of_node_put(bus);
/*
* Register hwmod links specific to certain ES levels of a
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 770216b..88676fe 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -147,7 +147,7 @@
.nshutdown_gpio = 137,
.dev_name = "/dev/ttyO1",
.flow_cntrl = 1,
- .baud_rate = 300000,
+ .baud_rate = 3000000,
};
static struct platform_device wl18xx_device = {
@@ -162,7 +162,7 @@
.nshutdown_gpio = 162,
.dev_name = "/dev/ttyO1",
.flow_cntrl = 1,
- .baud_rate = 300000,
+ .baud_rate = 3000000,
};
static struct platform_device wl128x_device = {
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ab77100..00e9e79 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -930,13 +930,31 @@
__arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
}
+/*
+ * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
+ * that the intention is to allow exporting memory allocated via the
+ * coherent DMA APIs through the dma_buf API, which only accepts a
+ * scattertable. This presents a couple of problems:
+ * 1. Not all memory allocated via the coherent DMA APIs is backed by
+ * a struct page
+ * 2. Passing coherent DMA memory into the streaming APIs is not allowed
+ * as we will try to flush the memory through a different alias to that
+ * actually being used (and the flushes are redundant.)
+ */
int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t handle, size_t size,
unsigned long attrs)
{
- struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
+ unsigned long pfn = dma_to_pfn(dev, handle);
+ struct page *page;
int ret;
+ /* If the PFN is not valid, we do not have a struct page */
+ if (!pfn_valid(pfn))
+ return -ENXIO;
+
+ page = pfn_to_page(pfn);
+
ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
if (unlikely(ret))
return ret;
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index a4ec240..3eb018f 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -433,6 +433,7 @@
struct hlist_node *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+ kprobe_opcode_t *correct_ret_addr = NULL;
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
@@ -455,15 +456,7 @@
/* another task is sharing our hash bucket */
continue;
- if (ri->rp && ri->rp->handler) {
- __this_cpu_write(current_kprobe, &ri->rp->kp);
- get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
- ri->rp->handler(ri, regs);
- __this_cpu_write(current_kprobe, NULL);
- }
-
orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri, &empty_rp);
if (orig_ret_address != trampoline_address)
/*
@@ -475,6 +468,33 @@
}
kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+ correct_ret_addr = ri->ret_addr;
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ if (ri->rp && ri->rp->handler) {
+ __this_cpu_write(current_kprobe, &ri->rp->kp);
+ get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+ ri->ret_addr = correct_ret_addr;
+ ri->rp->handler(ri, regs);
+ __this_cpu_write(current_kprobe, NULL);
+ }
+
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
kretprobe_hash_unlock(current, &flags);
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c
index 9775de2..a48354d 100644
--- a/arch/arm/probes/kprobes/test-core.c
+++ b/arch/arm/probes/kprobes/test-core.c
@@ -976,7 +976,10 @@
void __naked __kprobes_test_case_start(void)
{
__asm__ __volatile__ (
- "stmdb sp!, {r4-r11} \n\t"
+ "mov r2, sp \n\t"
+ "bic r3, r2, #7 \n\t"
+ "mov sp, r3 \n\t"
+ "stmdb sp!, {r2-r11} \n\t"
"sub sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
"bic r0, lr, #1 @ r0 = inline data \n\t"
"mov r1, sp \n\t"
@@ -996,7 +999,8 @@
"movne pc, r0 \n\t"
"mov r0, r4 \n\t"
"add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
- "ldmia sp!, {r4-r11} \n\t"
+ "ldmia sp!, {r2-r11} \n\t"
+ "mov sp, r2 \n\t"
"mov pc, r0 \n\t"
);
}
@@ -1012,7 +1016,8 @@
"bxne r0 \n\t"
"mov r0, r4 \n\t"
"add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
- "ldmia sp!, {r4-r11} \n\t"
+ "ldmia sp!, {r2-r11} \n\t"
+ "mov sp, r2 \n\t"
"bx r0 \n\t"
);
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fbf07a6..5702e7d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -733,6 +733,18 @@
However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
4M allocations matching the default size used by generic code.
+config UNMAP_KERNEL_AT_EL0
+ bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT
+ default y
+ help
+ Speculation attacks against some high-performance processors can
+ be used to bypass MMU permission checks and leak kernel data to
+ userspace. This can be defended against by unmapping the kernel
+ when running in userspace, mapping it back in on exception entry
+ via a trampoline page in the vector table.
+
+ If unsure, say Y.
+
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index f736cda..c76311c 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -14,8 +14,12 @@
CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
GZFLAGS :=-9
-ifneq ($(CONFIG_RELOCATABLE),)
-LDFLAGS_vmlinux += -pie -shared -Bsymbolic
+ifeq ($(CONFIG_RELOCATABLE), y)
+# Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
+# for relative relocs, since this leads to better Image compression
+# with the relocation offsets always being zero.
+LDFLAGS_vmlinux += -pie -shared -Bsymbolic \
+ $(call ld-option, --no-apply-dynamic-relocs)
endif
ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 7c4218e..e008b76c 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -429,17 +429,4 @@
mrs \rd, sp_el0
.endm
-/*
- * Errata workaround post TTBR0_EL1 update.
- */
- .macro post_ttbr0_update_workaround
-#ifdef CONFIG_CAVIUM_ERRATUM_27456
-alternative_if ARM64_WORKAROUND_CAVIUM_27456
- ic iallu
- dsb nsh
- isb
-alternative_else_nop_endif
-#endif
- .endm
-
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 87b4465..4a4a98a 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -35,6 +35,8 @@
#define ARM64_HYP_OFFSET_LOW 14
#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
-#define ARM64_NCAPS 16
+#define ARM64_UNMAP_KERNEL_AT_EL0 23
+
+#define ARM64_NCAPS 24
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 7278ce7..6c4f9e8 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -85,12 +85,14 @@
if (mm != current->active_mm) {
/*
* Update the current thread's saved ttbr0 since it is
- * restored as part of a return from exception. Set
- * the hardware TTBR0_EL1 using cpu_switch_mm()
- * directly to enable potential errata workarounds.
+ * restored as part of a return from exception. Enable
+ * access to the valid TTBR0_EL1 and invoke the errata
+ * workaround directly since there is no return from
+ * exception when invoking the EFI run-time services.
*/
update_saved_ttbr0(current, mm);
- cpu_switch_mm(mm->pgd, mm);
+ uaccess_ttbr0_enable();
+ post_ttbr_update_workaround();
} else {
/*
* Defer the switch to the current thread's TTBR0_EL1
@@ -98,7 +100,7 @@
* thread's saved ttbr0 corresponding to its active_mm
* (if different from init_mm).
*/
- cpu_set_reserved_ttbr0();
+ uaccess_ttbr0_disable();
if (current->active_mm != &init_mm)
update_saved_ttbr0(current, current->active_mm);
}
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index d14c478..85997c0 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -175,6 +175,12 @@
#define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \
ESR_ELx_SYS64_ISS_DIR_READ)
+#define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \
+ ESR_ELx_SYS64_ISS_DIR_READ)
+
+#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \
+ ESR_ELx_SYS64_ISS_DIR_READ)
+
#ifndef __ASSEMBLY__
#include <asm/types.h>
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index caf86be..d8e5805 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -51,6 +51,12 @@
FIX_EARLYCON_MEM_BASE,
FIX_TEXT_POKE0,
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ FIX_ENTRY_TRAMP_DATA,
+ FIX_ENTRY_TRAMP_TEXT,
+#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT))
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
__end_of_permanent_fixed_addresses,
/*
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 7803343..77a27af 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -78,8 +78,16 @@
/*
* Initial memory map attributes.
*/
-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#define _SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define _SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define SWAPPER_PTE_FLAGS (_SWAPPER_PTE_FLAGS | PTE_NG)
+#define SWAPPER_PMD_FLAGS (_SWAPPER_PMD_FLAGS | PMD_SECT_NG)
+#else
+#define SWAPPER_PTE_FLAGS _SWAPPER_PTE_FLAGS
+#define SWAPPER_PMD_FLAGS _SWAPPER_PMD_FLAGS
+#endif
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 2a2752b..0dbc1c6 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -170,8 +170,7 @@
#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (UL(48))
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index f43680f..30cd697 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -165,6 +165,11 @@
/* the offset between the kernel virtual and physical mappings */
extern u64 kimage_voffset;
+static inline unsigned long kaslr_offset(void)
+{
+ return kimage_vaddr - KIMAGE_VADDR;
+}
+
/*
* Allow all memory at the discovery stage. We will clip it later.
*/
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 8d9fce0..d7750fc 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -16,6 +16,12 @@
#ifndef __ASM_MMU_H
#define __ASM_MMU_H
+
+#define USER_ASID_FLAG (UL(1) << 48)
+#define TTBR_ASID_MASK (UL(0xffff) << 48)
+
+#ifndef __ASSEMBLY__
+
typedef struct {
atomic64_t id;
void *vdso;
@@ -28,6 +34,12 @@
*/
#define ASID(mm) ((mm)->context.id.counter & 0xffff)
+static inline bool arm64_kernel_unmapped_at_el0(void)
+{
+ return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) &&
+ cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0);
+}
+
extern void paging_init(void);
extern void bootmem_init(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
@@ -37,4 +49,5 @@
pgprot_t prot, bool allow_block_mappings);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
+#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 63e9982..6ad61e7 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -51,6 +51,13 @@
isb();
}
+static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
+{
+ BUG_ON(pgd == swapper_pg_dir);
+ cpu_set_reserved_ttbr0();
+ cpu_do_switch_mm(virt_to_phys(pgd),mm);
+}
+
/*
* TCR.T0SZ value to use when the ID map is active. Usually equals
* TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
@@ -169,9 +176,10 @@
struct mm_struct *mm)
{
if (system_uses_ttbr0_pan()) {
+ u64 ttbr;
BUG_ON(mm->pgd == swapper_pg_dir);
- task_thread_info(tsk)->ttbr0 =
- virt_to_phys(mm->pgd) | ASID(mm) << 48;
+ ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
+ WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);
}
}
#else
@@ -219,5 +227,6 @@
#define activate_mm(prev,next) switch_mm(prev, next, current)
void verify_cpu_asid_bits(void);
+void post_ttbr_update_workaround(void);
#endif
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index eb0c2bd..8df4cb6 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -272,6 +272,7 @@
#define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT)
#define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT)
+#define TCR_A1 (UL(1) << 22)
#define TCR_ASID16 (UL(1) << 36)
#define TCR_TBI0 (UL(1) << 37)
#define TCR_HA (UL(1) << 39)
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 2142c77..84b5283 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -34,8 +34,16 @@
#include <asm/pgtable-types.h>
-#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define PROT_DEFAULT (_PROT_DEFAULT | PTE_NG)
+#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_SECT_NG)
+#else
+#define PROT_DEFAULT _PROT_DEFAULT
+#define PROT_SECT_DEFAULT _PROT_SECT_DEFAULT
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
@@ -48,6 +56,7 @@
#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+#define _HYP_PAGE_DEFAULT (_PAGE_DEFAULT & ~PTE_NG)
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
@@ -55,15 +64,15 @@
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
-#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
-#define PAGE_HYP_EXEC __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
-#define PAGE_HYP_RO __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
+#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
+#define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
+#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
-#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 1af9f3c..7bcd8b9 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -692,6 +692,7 @@
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
+extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
/*
* Encode and decode a swap entry:
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 14ad6e4..16cef2e 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -35,12 +35,6 @@
#include <asm/memory.h>
-#define cpu_switch_mm(pgd,mm) \
-do { \
- BUG_ON(pgd == swapper_pg_dir); \
- cpu_do_switch_mm(virt_to_phys(pgd),mm); \
-} while (0)
-
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* __ASM_PROCFNS_H */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index deab523..ad6bd8b 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -23,6 +23,7 @@
#include <linux/sched.h>
#include <asm/cputype.h>
+#include <asm/mmu.h>
/*
* Raw TLBI operations.
@@ -42,6 +43,11 @@
#define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0)
+#define __tlbi_user(op, arg) do { \
+ if (arm64_kernel_unmapped_at_el0()) \
+ __tlbi(op, (arg) | USER_ASID_FLAG); \
+} while (0)
+
/*
* TLB Management
* ==============
@@ -103,6 +109,7 @@
dsb(ishst);
__tlbi(aside1is, asid);
+ __tlbi_user(aside1is, asid);
dsb(ish);
}
@@ -113,6 +120,7 @@
dsb(ishst);
__tlbi(vale1is, addr);
+ __tlbi_user(vale1is, addr);
dsb(ish);
}
@@ -139,10 +147,13 @@
dsb(ishst);
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
- if (last_level)
+ if (last_level) {
__tlbi(vale1is, addr);
- else
+ __tlbi_user(vale1is, addr);
+ } else {
__tlbi(vae1is, addr);
+ __tlbi_user(vae1is, addr);
+ }
}
dsb(ish);
}
@@ -182,6 +193,7 @@
unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
__tlbi(vae1is, addr);
+ __tlbi_user(vae1is, addr);
dsb(ish);
}
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 02e9035..47a9066 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -37,18 +37,11 @@
void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static inline int __in_irqentry_text(unsigned long ptr)
{
return ptr >= (unsigned long)&__irqentry_text_start &&
ptr < (unsigned long)&__irqentry_text_end;
}
-#else
-static inline int __in_irqentry_text(unsigned long ptr)
-{
- return 0;
-}
-#endif
static inline int in_exception_text(unsigned long ptr)
{
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 31b6940..95cbade 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -20,6 +20,7 @@
#include <asm/alternative.h>
#include <asm/kernel-pgtable.h>
+#include <asm/mmu.h>
#include <asm/sysreg.h>
#ifndef __ASSEMBLY__
@@ -130,17 +131,23 @@
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
static inline void __uaccess_ttbr0_disable(void)
{
- unsigned long ttbr;
+ unsigned long flags, ttbr;
+ local_irq_save(flags);
+ ttbr = read_sysreg(ttbr1_el1);
+ ttbr &= ~TTBR_ASID_MASK;
/* reserved_ttbr0 placed at the end of swapper_pg_dir */
- ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE;
- write_sysreg(ttbr, ttbr0_el1);
+ write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1);
isb();
+ /* Set reserved ASID */
+ write_sysreg(ttbr, ttbr1_el1);
+ isb();
+ local_irq_restore(flags);
}
static inline void __uaccess_ttbr0_enable(void)
{
- unsigned long flags;
+ unsigned long flags, ttbr0, ttbr1;
/*
* Disable interrupts to avoid preemption between reading the 'ttbr0'
@@ -148,7 +155,17 @@
* roll-over and an update of 'ttbr0'.
*/
local_irq_save(flags);
- write_sysreg(current_thread_info()->ttbr0, ttbr0_el1);
+ ttbr0 = READ_ONCE(current_thread_info()->ttbr0);
+
+ /* Restore active ASID */
+ ttbr1 = read_sysreg(ttbr1_el1);
+ ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */
+ ttbr1 |= ttbr0 & TTBR_ASID_MASK;
+ write_sysreg(ttbr1, ttbr1_el1);
+ isb();
+
+ /* Restore user page table */
+ write_sysreg(ttbr0, ttbr0_el1);
isb();
local_irq_restore(flags);
}
@@ -435,51 +452,62 @@
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
.macro __uaccess_ttbr0_disable, tmp1
mrs \tmp1, ttbr1_el1 // swapper_pg_dir
+ bic \tmp1, \tmp1, #TTBR_ASID_MASK
add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir
msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
isb
+ sub \tmp1, \tmp1, #SWAPPER_DIR_SIZE
+ msr ttbr1_el1, \tmp1 // set reserved ASID
+ isb
.endm
- .macro __uaccess_ttbr0_enable, tmp1
+ .macro __uaccess_ttbr0_enable, tmp1, tmp2
get_thread_info \tmp1
ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1
+ mrs \tmp2, ttbr1_el1
+ extr \tmp2, \tmp2, \tmp1, #48
+ ror \tmp2, \tmp2, #16
+ msr ttbr1_el1, \tmp2 // set the active ASID
+ isb
msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
isb
.endm
- .macro uaccess_ttbr0_disable, tmp1
-alternative_if_not ARM64_HAS_PAN
- __uaccess_ttbr0_disable \tmp1
-alternative_else_nop_endif
- .endm
-
- .macro uaccess_ttbr0_enable, tmp1, tmp2
+ .macro uaccess_ttbr0_disable, tmp1, tmp2
alternative_if_not ARM64_HAS_PAN
save_and_disable_irq \tmp2 // avoid preemption
- __uaccess_ttbr0_enable \tmp1
+ __uaccess_ttbr0_disable \tmp1
restore_irq \tmp2
alternative_else_nop_endif
.endm
+
+ .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
+alternative_if_not ARM64_HAS_PAN
+ save_and_disable_irq \tmp3 // avoid preemption
+ __uaccess_ttbr0_enable \tmp1, \tmp2
+ restore_irq \tmp3
+alternative_else_nop_endif
+ .endm
#else
- .macro uaccess_ttbr0_disable, tmp1
+ .macro uaccess_ttbr0_disable, tmp1, tmp2
.endm
- .macro uaccess_ttbr0_enable, tmp1, tmp2
+ .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
.endm
#endif
/*
* These macros are no-ops when UAO is present.
*/
- .macro uaccess_disable_not_uao, tmp1
- uaccess_ttbr0_disable \tmp1
+ .macro uaccess_disable_not_uao, tmp1, tmp2
+ uaccess_ttbr0_disable \tmp1, \tmp2
alternative_if ARM64_ALT_PAN_NOT_UAO
SET_PSTATE_PAN(1)
alternative_else_nop_endif
.endm
- .macro uaccess_enable_not_uao, tmp1, tmp2
- uaccess_ttbr0_enable \tmp1, \tmp2
+ .macro uaccess_enable_not_uao, tmp1, tmp2, tmp3
+ uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3
alternative_if ARM64_ALT_PAN_NOT_UAO
SET_PSTATE_PAN(0)
alternative_else_nop_endif
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 433f03e..b6c9389 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -24,6 +24,7 @@
#include <linux/kvm_host.h>
#include <linux/suspend.h>
#include <asm/cpufeature.h>
+#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
@@ -147,11 +148,14 @@
DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2));
DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id));
DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state));
-
BLANK();
DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val));
+ BLANK();
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ DEFINE(TRAMP_VALIAS, TRAMP_VALIAS);
+#endif
return 0;
}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0127e1b..bfbd9e9 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -748,6 +748,40 @@
return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode();
}
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
+
+static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
+ int __unused)
+{
+ /* Forced on command line? */
+ if (__kpti_forced) {
+ pr_info_once("kernel page table isolation forced %s by command line option\n",
+ __kpti_forced > 0 ? "ON" : "OFF");
+ return __kpti_forced > 0;
+ }
+
+ /* Useful for KASLR robustness */
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+ return true;
+
+ return false;
+}
+
+static int __init parse_kpti(char *str)
+{
+ bool enabled;
+ int ret = strtobool(str, &enabled);
+
+ if (ret)
+ return ret;
+
+ __kpti_forced = enabled ? 1 : -1;
+ return 0;
+}
+__setup("kpti=", parse_kpti);
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
@@ -831,6 +865,13 @@
.def_scope = SCOPE_SYSTEM,
.matches = hyp_offset_low,
},
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ {
+ .capability = ARM64_UNMAP_KERNEL_AT_EL0,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = unmap_kernel_at_el0,
+ },
+#endif
{},
};
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c44a933..32de484 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -29,6 +29,7 @@
#include <asm/esr.h>
#include <asm/irq.h>
#include <asm/memory.h>
+#include <asm/mmu.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/uaccess.h>
@@ -70,8 +71,31 @@
#define BAD_FIQ 2
#define BAD_ERROR 3
- .macro kernel_entry, el, regsize = 64
+ .macro kernel_ventry, el, label, regsize = 64
+ .align 7
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+alternative_if ARM64_UNMAP_KERNEL_AT_EL0
+ .if \el == 0
+ .if \regsize == 64
+ mrs x30, tpidrro_el0
+ msr tpidrro_el0, xzr
+ .else
+ mov x30, xzr
+ .endif
+ .endif
+alternative_else_nop_endif
+#endif
+
sub sp, sp, #S_FRAME_SIZE
+ b el\()\el\()_\label
+ .endm
+
+ .macro tramp_alias, dst, sym
+ mov_q \dst, TRAMP_VALIAS
+ add \dst, \dst, #(\sym - .entry.tramp.text)
+ .endm
+
+ .macro kernel_entry, el, regsize = 64
.if \regsize == 32
mov w0, w0 // zero upper 32 bits of x0
.endif
@@ -128,7 +152,7 @@
.if \el != 0
mrs x21, ttbr0_el1
- tst x21, #0xffff << 48 // Check for the reserved ASID
+ tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
b.eq 1f // TTBR0 access already disabled
and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
@@ -191,7 +215,7 @@
tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
.endif
- __uaccess_ttbr0_enable x0
+ __uaccess_ttbr0_enable x0, x1
.if \el == 0
/*
@@ -200,7 +224,7 @@
* Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
* corruption).
*/
- post_ttbr0_update_workaround
+ bl post_ttbr_update_workaround
.endif
1:
.if \el != 0
@@ -212,18 +236,20 @@
.if \el == 0
ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
+ tst x22, #PSR_MODE32_BIT // native task?
+ b.eq 3f
+
#ifdef CONFIG_ARM64_ERRATUM_845719
alternative_if ARM64_WORKAROUND_845719
- tbz x22, #4, 1f
#ifdef CONFIG_PID_IN_CONTEXTIDR
mrs x29, contextidr_el1
msr contextidr_el1, x29
#else
msr contextidr_el1, xzr
#endif
-1:
alternative_else_nop_endif
#endif
+3:
.endif
msr elr_el1, x21 // set up the return data
@@ -245,7 +271,21 @@
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
add sp, sp, #S_FRAME_SIZE // restore sp
- eret // return to kernel
+
+ .if \el == 0
+alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ bne 4f
+ msr far_el1, x30
+ tramp_alias x30, tramp_exit_native
+ br x30
+4:
+ tramp_alias x30, tramp_exit_compat
+ br x30
+#endif
+ .else
+ eret
+ .endif
.endm
.macro irq_stack_entry
@@ -316,31 +356,31 @@
.align 11
ENTRY(vectors)
- ventry el1_sync_invalid // Synchronous EL1t
- ventry el1_irq_invalid // IRQ EL1t
- ventry el1_fiq_invalid // FIQ EL1t
- ventry el1_error_invalid // Error EL1t
+ kernel_ventry 1, sync_invalid // Synchronous EL1t
+ kernel_ventry 1, irq_invalid // IRQ EL1t
+ kernel_ventry 1, fiq_invalid // FIQ EL1t
+ kernel_ventry 1, error_invalid // Error EL1t
- ventry el1_sync // Synchronous EL1h
- ventry el1_irq // IRQ EL1h
- ventry el1_fiq_invalid // FIQ EL1h
- ventry el1_error_invalid // Error EL1h
+ kernel_ventry 1, sync // Synchronous EL1h
+ kernel_ventry 1, irq // IRQ EL1h
+ kernel_ventry 1, fiq_invalid // FIQ EL1h
+ kernel_ventry 1, error_invalid // Error EL1h
- ventry el0_sync // Synchronous 64-bit EL0
- ventry el0_irq // IRQ 64-bit EL0
- ventry el0_fiq_invalid // FIQ 64-bit EL0
- ventry el0_error_invalid // Error 64-bit EL0
+ kernel_ventry 0, sync // Synchronous 64-bit EL0
+ kernel_ventry 0, irq // IRQ 64-bit EL0
+ kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0
+ kernel_ventry 0, error_invalid // Error 64-bit EL0
#ifdef CONFIG_COMPAT
- ventry el0_sync_compat // Synchronous 32-bit EL0
- ventry el0_irq_compat // IRQ 32-bit EL0
- ventry el0_fiq_invalid_compat // FIQ 32-bit EL0
- ventry el0_error_invalid_compat // Error 32-bit EL0
+ kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0
+ kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0
+ kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0
+ kernel_ventry 0, error_invalid_compat, 32 // Error 32-bit EL0
#else
- ventry el0_sync_invalid // Synchronous 32-bit EL0
- ventry el0_irq_invalid // IRQ 32-bit EL0
- ventry el0_fiq_invalid // FIQ 32-bit EL0
- ventry el0_error_invalid // Error 32-bit EL0
+ kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0
+ kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0
+ kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0
+ kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0
#endif
END(vectors)
@@ -860,6 +900,119 @@
.popsection // .entry.text
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+/*
+ * Exception vectors trampoline.
+ */
+ .pushsection ".entry.tramp.text", "ax"
+
+ .macro tramp_map_kernel, tmp
+ mrs \tmp, ttbr1_el1
+ sub \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
+ bic \tmp, \tmp, #USER_ASID_FLAG
+ msr ttbr1_el1, \tmp
+#ifdef CONFIG_ARCH_MSM8996
+ /* ASID already in \tmp[63:48] */
+ movk \tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12)
+ movk \tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12)
+ /* 2MB boundary containing the vectors, so we nobble the walk cache */
+ movk \tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12)
+ isb
+ tlbi vae1, \tmp
+ dsb nsh
+#endif /* CONFIG_ARCH_MSM8996 */
+ .endm
+
+ .macro tramp_unmap_kernel, tmp
+ mrs \tmp, ttbr1_el1
+ add \tmp, \tmp, #(SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
+ orr \tmp, \tmp, #USER_ASID_FLAG
+ msr ttbr1_el1, \tmp
+ /*
+ * We avoid running the post_ttbr_update_workaround here because the
+ * user and kernel ASIDs don't have conflicting mappings, so any
+ * "blessing" as described in:
+ *
+ * http://lkml.kernel.org/r/56BB848A.6060603@caviumnetworks.com
+ *
+ * will not hurt correctness. Whilst this may partially defeat the
+ * point of using split ASIDs in the first place, it avoids
+ * the hit of invalidating the entire I-cache on every return to
+ * userspace.
+ */
+ .endm
+
+ .macro tramp_ventry, regsize = 64
+ .align 7
+1:
+ .if \regsize == 64
+ msr tpidrro_el0, x30 // Restored in kernel_ventry
+ .endif
+ bl 2f
+ b .
+2:
+ tramp_map_kernel x30
+#ifdef CONFIG_RANDOMIZE_BASE
+ adr x30, tramp_vectors + PAGE_SIZE
+#ifndef CONFIG_ARCH_MSM8996
+ isb
+#endif
+ ldr x30, [x30]
+#else
+ ldr x30, =vectors
+#endif
+ prfm plil1strm, [x30, #(1b - tramp_vectors)]
+ msr vbar_el1, x30
+ add x30, x30, #(1b - tramp_vectors)
+ isb
+ ret
+ .endm
+
+ .macro tramp_exit, regsize = 64
+ adr x30, tramp_vectors
+ msr vbar_el1, x30
+ tramp_unmap_kernel x30
+ .if \regsize == 64
+ mrs x30, far_el1
+ .endif
+ eret
+ .endm
+
+ .align 11
+ENTRY(tramp_vectors)
+ .space 0x400
+
+ tramp_ventry
+ tramp_ventry
+ tramp_ventry
+ tramp_ventry
+
+ tramp_ventry 32
+ tramp_ventry 32
+ tramp_ventry 32
+ tramp_ventry 32
+END(tramp_vectors)
+
+ENTRY(tramp_exit_native)
+ tramp_exit
+END(tramp_exit_native)
+
+ENTRY(tramp_exit_compat)
+ tramp_exit 32
+END(tramp_exit_compat)
+
+ .ltorg
+ .popsection // .entry.tramp.text
+#ifdef CONFIG_RANDOMIZE_BASE
+ .pushsection ".rodata", "a"
+ .align PAGE_SHIFT
+ .globl __entry_tramp_data_start
+__entry_tramp_data_start:
+ .quad vectors
+ .popsection // .rodata
+#endif /* CONFIG_RANDOMIZE_BASE */
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+
/*
* Special system call wrappers.
*/
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 2a47416..7e365ff 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -321,6 +321,15 @@
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
+ /*
+ * In case p was allocated the same task_struct pointer as some
+ * other recently-exited task, make sure p is disassociated from
+ * any cpu that may have run that now-exited task recently.
+ * Otherwise we could erroneously skip reloading the FPSIMD
+ * registers for p.
+ */
+ fpsimd_flush_task_state(p);
+
if (likely(!(p->flags & PF_KTHREAD))) {
*childregs = *current_pt_regs();
childregs->regs[0] = 0;
@@ -363,17 +372,17 @@
static void tls_thread_switch(struct task_struct *next)
{
- unsigned long tpidr, tpidrro;
+ unsigned long tpidr;
tpidr = read_sysreg(tpidr_el0);
*task_user_tls(current) = tpidr;
- tpidr = *task_user_tls(next);
- tpidrro = is_compat_thread(task_thread_info(next)) ?
- next->thread.tp_value : 0;
+ if (is_compat_thread(task_thread_info(next)))
+ write_sysreg(next->thread.tp_value, tpidrro_el0);
+ else if (!arm64_kernel_unmapped_at_el0())
+ write_sysreg(0, tpidrro_el0);
- write_sysreg(tpidr, tpidr_el0);
- write_sysreg(tpidrro, tpidrro_el0);
+ write_sysreg(*task_user_tls(next), tpidr_el0);
}
/* Restore the UAO state depending on next's addr_limit */
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index fe014d6..6a9dc51 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -339,11 +339,11 @@
static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
void *p)
{
- u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR;
+ const unsigned long offset = kaslr_offset();
- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) {
- pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n",
- kaslr_offset, KIMAGE_VADDR);
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && offset > 0) {
+ pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
+ offset, KIMAGE_VADDR);
} else {
pr_emerg("Kernel Offset: disabled\n");
}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index dc1c9fc..5fd787a 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -33,6 +33,7 @@
#include <linux/syscalls.h>
#include <asm/atomic.h>
+#include <asm/barrier.h>
#include <asm/bug.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
@@ -495,6 +496,25 @@
regs->pc += 4;
}
+static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
+{
+ int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+
+ isb();
+ if (rt != 31)
+ regs->regs[rt] = arch_counter_get_cntvct();
+ regs->pc += 4;
+}
+
+static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
+{
+ int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+
+ if (rt != 31)
+ regs->regs[rt] = read_sysreg(cntfrq_el0);
+ regs->pc += 4;
+}
+
struct sys64_hook {
unsigned int esr_mask;
unsigned int esr_val;
@@ -513,6 +533,18 @@
.esr_val = ESR_ELx_SYS64_ISS_SYS_CTR_READ,
.handler = ctr_read_handler,
},
+ {
+ /* Trap read access to CNTVCT_EL0 */
+ .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
+ .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCT,
+ .handler = cntvct_read_handler,
+ },
+ {
+ /* Trap read access to CNTFRQ_EL0 */
+ .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
+ .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ,
+ .handler = cntfrq_read_handler,
+ },
{},
};
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index b8deffa..34d3ed6 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -56,6 +56,17 @@
#define HIBERNATE_TEXT
#endif
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define TRAMP_TEXT \
+ . = ALIGN(PAGE_SIZE); \
+ VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \
+ *(.entry.tramp.text) \
+ . = ALIGN(PAGE_SIZE); \
+ VMLINUX_SYMBOL(__entry_tramp_text_end) = .;
+#else
+#define TRAMP_TEXT
+#endif
+
/*
* The size of the PE/COFF section that covers the kernel image, which
* runs from stext to _edata, must be a round multiple of the PE/COFF
@@ -128,6 +139,7 @@
HYPERVISOR_TEXT
IDMAP_TEXT
HIBERNATE_TEXT
+ TRAMP_TEXT
*(.fixup)
*(.gnu.warning)
. = ALIGN(16);
@@ -221,6 +233,11 @@
. += RESERVED_TTBR0_SIZE;
#endif
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ tramp_pg_dir = .;
+ . += PAGE_SIZE;
+#endif
+
_end = .;
STABS_DEBUG
@@ -240,7 +257,10 @@
ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
<= SZ_4K, "Hibernate exit text too big or misaligned")
#endif
-
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
+ "Entry trampoline text too big")
+#endif
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.
*/
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index a204adf..2e6e9e9 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -44,7 +44,7 @@
ret = kvm_psci_call(vcpu);
if (ret < 0) {
- kvm_inject_undefined(vcpu);
+ vcpu_set_reg(vcpu, 0, ~0UL);
return 1;
}
@@ -53,7 +53,7 @@
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- kvm_inject_undefined(vcpu);
+ vcpu_set_reg(vcpu, 0, ~0UL);
return 1;
}
@@ -125,7 +125,19 @@
return ret;
}
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+ kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
+ hsr, esr_get_class_string(hsr));
+
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec,
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
[ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32,
[ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64,
@@ -151,13 +163,6 @@
u32 hsr = kvm_vcpu_get_hsr(vcpu);
u8 hsr_ec = ESR_ELx_EC(hsr);
- if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
- !arm_exit_handlers[hsr_ec]) {
- kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
- hsr, esr_get_class_string(hsr));
- BUG();
- }
-
return arm_exit_handlers[hsr_ec];
}
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index d7150e3..07c7ad9 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -30,7 +30,7 @@
* Alignment fixed up by hardware.
*/
ENTRY(__clear_user)
- uaccess_enable_not_uao x2, x3
+ uaccess_enable_not_uao x2, x3, x4
mov x2, x1 // save the size for fixup return
subs x1, x1, #8
b.mi 2f
@@ -50,7 +50,7 @@
b.mi 5f
uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
5: mov x0, #0
- uaccess_disable_not_uao x2
+ uaccess_disable_not_uao x2, x3
ret
ENDPROC(__clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index cfe1339..c7a7d96 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -64,10 +64,10 @@
end .req x5
ENTRY(__arch_copy_from_user)
- uaccess_enable_not_uao x3, x4
+ uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3
+ uaccess_disable_not_uao x3, x4
mov x0, #0 // Nothing to copy
ret
ENDPROC(__arch_copy_from_user)
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 718b1c4..e8bfaf1 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -65,10 +65,10 @@
end .req x5
ENTRY(__copy_in_user)
- uaccess_enable_not_uao x3, x4
+ uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3
+ uaccess_disable_not_uao x3, x4
mov x0, #0
ret
ENDPROC(__copy_in_user)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index e99e31c..f6cfcc0 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -63,10 +63,10 @@
end .req x5
ENTRY(__arch_copy_to_user)
- uaccess_enable_not_uao x3, x4
+ uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3
+ uaccess_disable_not_uao x3, x4
mov x0, #0
ret
ENDPROC(__arch_copy_to_user)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index da95769..82ecb6c 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -49,7 +49,7 @@
* - end - virtual end address of region
*/
ENTRY(__flush_cache_user_range)
- uaccess_ttbr0_enable x2, x3
+ uaccess_ttbr0_enable x2, x3, x4
dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x0, x3
@@ -72,7 +72,7 @@
isb
mov x0, #0
1:
- uaccess_ttbr0_disable x1
+ uaccess_ttbr0_disable x1, x2
ret
9:
mov x0, #-EFAULT
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 4c63cb1..d45dea9 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -39,7 +39,16 @@
#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
#define ASID_FIRST_VERSION (1UL << asid_bits)
-#define NUM_USER_ASIDS ASID_FIRST_VERSION
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1)
+#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1)
+#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK)
+#else
+#define NUM_USER_ASIDS (ASID_FIRST_VERSION)
+#define asid2idx(asid) ((asid) & ~ASID_MASK)
+#define idx2asid(idx) asid2idx(idx)
+#endif
/* Get the ASIDBits supported by the current CPU */
static u32 get_cpu_asid_bits(void)
@@ -104,7 +113,7 @@
*/
if (asid == 0)
asid = per_cpu(reserved_asids, i);
- __set_bit(asid & ~ASID_MASK, asid_map);
+ __set_bit(asid2idx(asid), asid_map);
per_cpu(reserved_asids, i) = asid;
}
@@ -159,16 +168,16 @@
* We had a valid ASID in a previous life, so try to re-use
* it if possible.
*/
- asid &= ~ASID_MASK;
- if (!__test_and_set_bit(asid, asid_map))
+ if (!__test_and_set_bit(asid2idx(asid), asid_map))
return newasid;
}
/*
* Allocate a free ASID. If we can't find one, take a note of the
- * currently active ASIDs and mark the TLBs as requiring flushes.
- * We always count from ASID #1, as we use ASID #0 when setting a
- * reserved TTBR0 for the init_mm.
+ * currently active ASIDs and mark the TLBs as requiring flushes. We
+ * always count from ASID #2 (index 1), as we use ASID #0 when setting
+ * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
+ * pairs.
*/
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
if (asid != NUM_USER_ASIDS)
@@ -185,7 +194,7 @@
set_asid:
__set_bit(asid, asid_map);
cur_idx = asid;
- return asid | generation;
+ return idx2asid(asid) | generation;
}
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
@@ -229,6 +238,15 @@
cpu_switch_mm(mm->pgd, mm);
}
+/* Errata workaround post TTBRx_EL1 update. */
+asmlinkage void post_ttbr_update_workaround(void)
+{
+ asm(ALTERNATIVE("nop; nop; nop",
+ "ic iallu; dsb nsh; isb",
+ ARM64_WORKAROUND_CAVIUM_27456,
+ CONFIG_CAVIUM_ERRATUM_27456));
+}
+
static int asids_init(void)
{
asid_bits = get_cpu_asid_bits();
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f8ef496..2b35b67 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -299,6 +299,7 @@
arm64_dma_phys_limit = max_zone_dma_phys();
else
arm64_dma_phys_limit = PHYS_MASK + 1;
+ high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
dma_contiguous_reserve(arm64_dma_phys_limit);
memblock_allow_resize();
@@ -325,7 +326,6 @@
sparse_init();
zone_sizes_init(min, max);
- high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
memblock_dump_all();
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 33ecaff..31d16d8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -420,6 +420,37 @@
vm_area_add_early(vma);
}
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+static int __init map_entry_trampoline(void)
+{
+ extern char __entry_tramp_text_start[];
+
+ pgprot_t prot = PAGE_KERNEL_EXEC;
+ phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
+
+ /* The trampoline is always mapped and can therefore be global */
+ pgprot_val(prot) &= ~PTE_NG;
+
+ /* Map only the text into the trampoline page table */
+ memset(tramp_pg_dir, 0, PGD_SIZE);
+ __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
+ prot, pgd_pgtable_alloc, 0);
+
+ /* Map both the text and data into the kernel page table */
+ __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+ extern char __entry_tramp_data_start[];
+
+ __set_fixmap(FIX_ENTRY_TRAMP_DATA,
+ __pa_symbol(__entry_tramp_data_start),
+ PAGE_KERNEL_RO);
+ }
+
+ return 0;
+}
+core_initcall(map_entry_trampoline);
+#endif
+
/*
* Create fine-grained mappings for the kernel.
*/
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c2adb0c..89e3d95 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -132,12 +132,17 @@
* - pgd_phys - physical address of new TTB
*/
ENTRY(cpu_do_switch_mm)
+ mrs x2, ttbr1_el1
mmid x1, x1 // get mm->context.id
- bfi x0, x1, #48, #16 // set the ASID
- msr ttbr0_el1, x0 // set TTBR0
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ bfi x0, x1, #48, #16 // set the ASID field in TTBR0
+#endif
+ bfi x2, x1, #48, #16 // set the ASID
+ msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set)
isb
- post_ttbr0_update_workaround
- ret
+ msr ttbr0_el1, x0 // now update TTBR0
+ isb
+ b post_ttbr_update_workaround // Back to C code...
ENDPROC(cpu_do_switch_mm)
.pushsection ".idmap.text", "ax"
@@ -218,7 +223,7 @@
* both user and kernel.
*/
ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
- TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
+ TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1
tcr_set_idmap_t0sz x10, x9
/*
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index b41aff2..69711f2 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -100,12 +100,12 @@
* need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
* is enabled (it implies that hardware UAO and PAN disabled).
*/
- uaccess_ttbr0_enable x6, x7
+ uaccess_ttbr0_enable x6, x7, x8
hvc XEN_IMM
/*
* Disable userspace access from kernel once the hyp call completed.
*/
- uaccess_ttbr0_disable x6
+ uaccess_ttbr0_disable x6, x7
ret
ENDPROC(privcmd_call);
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 3c1bd64..88c4b77 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -319,11 +319,14 @@
config GPIO_ADI
def_bool y
+ depends on !PINCTRL
depends on (BF51x || BF52x || BF53x || BF538 || BF539 || BF561)
-config PINCTRL
+config PINCTRL_BLACKFIN_ADI2
def_bool y
- depends on BF54x || BF60x
+ depends on (BF54x || BF60x)
+ select PINCTRL
+ select PINCTRL_ADI2
config MEM_MT48LC64M4A2FB_7E
bool
diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug
index f3337ee..a93cf06 100644
--- a/arch/blackfin/Kconfig.debug
+++ b/arch/blackfin/Kconfig.debug
@@ -17,6 +17,7 @@
config DEBUG_MMRS
tristate "Generate Blackfin MMR tree"
+ depends on !PINCTRL
select DEBUG_FS
help
Create a tree of Blackfin MMRs via the debugfs tree. If
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 87131cd..6d3a504 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -169,7 +169,7 @@
max_pfn = max_low_pfn = PFN_DOWN(_ramend);
high_memory = (void *)_ramend;
- m68k_virt_to_node_shift = fls(_ramend - _rambase - 1) - 6;
+ m68k_virt_to_node_shift = fls(_ramend - 1) - 6;
module_fixup(NULL, __start_fixup, __stop_fixup);
/* setup bootmem data */
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index 3446b6f..9da4e22 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -576,7 +576,7 @@
uart_port.type = PORT_AR7;
uart_port.uartclk = clk_get_rate(bus_clk) / 2;
uart_port.iotype = UPIO_MEM32;
- uart_port.flags = UPF_FIXED_TYPE;
+ uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF;
uart_port.regshift = 2;
uart_port.line = 0;
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index c558bce..6e716a5 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -683,6 +683,18 @@
struct task_struct *t;
int max_users;
+ /* If nothing to change, return right away, successfully. */
+ if (value == mips_get_process_fp_mode(task))
+ return 0;
+
+ /* Only accept a mode change if 64-bit FP enabled for o32. */
+ if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
+ return -EOPNOTSUPP;
+
+ /* And only for o32 tasks. */
+ if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
+ return -EOPNOTSUPP;
+
/* Check the value is valid */
if (value & ~known_bits)
return -EOPNOTSUPP;
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 11890e6..0c8ae2c 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -439,25 +439,38 @@
#endif /* CONFIG_64BIT */
-static int fpr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots
+ * correspond 1:1 to buffer slots. Only general registers are copied.
+ */
+static int fpr_get_fpa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ void **kbuf, void __user **ubuf)
{
- unsigned i;
- int err;
+ return user_regset_copyout(pos, count, kbuf, ubuf,
+ &target->thread.fpu,
+ 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's
+ * general register slots are copied to buffer slots. Only general
+ * registers are copied.
+ */
+static int fpr_get_msa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ void **kbuf, void __user **ubuf)
+{
+ unsigned int i;
u64 fpr_val;
+ int err;
- /* XXX fcr31 */
-
- if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
-
+ BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
for (i = 0; i < NUM_FPU_REGS; i++) {
fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
- err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ err = user_regset_copyout(pos, count, kbuf, ubuf,
&fpr_val, i * sizeof(elf_fpreg_t),
(i + 1) * sizeof(elf_fpreg_t));
if (err)
@@ -467,27 +480,64 @@
return 0;
}
-static int fpr_set(struct task_struct *target,
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ */
+static int fpr_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
+ void *kbuf, void __user *ubuf)
{
- unsigned i;
+ const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
int err;
+
+ if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+ err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
+ else
+ err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
+ if (err)
+ return err;
+
+ err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fcr31,
+ fcr31_pos, fcr31_pos + sizeof(u32));
+
+ return err;
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP
+ * context's general register slots. Only general registers are copied.
+ */
+static int fpr_set_fpa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ const void **kbuf, const void __user **ubuf)
+{
+ return user_regset_copyin(pos, count, kbuf, ubuf,
+ &target->thread.fpu,
+ 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64
+ * bits only of FP context's general register slots. Only general
+ * registers are copied.
+ */
+static int fpr_set_msa(struct task_struct *target,
+ unsigned int *pos, unsigned int *count,
+ const void **kbuf, const void __user **ubuf)
+{
+ unsigned int i;
u64 fpr_val;
-
- /* XXX fcr31 */
-
- init_fp_ctx(target);
-
- if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu,
- 0, sizeof(elf_fpregset_t));
+ int err;
BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
- for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
- err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
+ err = user_regset_copyin(pos, count, kbuf, ubuf,
&fpr_val, i * sizeof(elf_fpreg_t),
(i + 1) * sizeof(elf_fpreg_t));
if (err)
@@ -498,6 +548,53 @@
return 0;
}
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ *
+ * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
+ * which is supposed to have been guaranteed by the kernel before
+ * calling us, e.g. in `ptrace_regset'. We enforce that requirement,
+ * so that we can safely avoid preinitializing temporaries for
+ * partial register writes.
+ */
+static int fpr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+ u32 fcr31;
+ int err;
+
+ BUG_ON(count % sizeof(elf_fpreg_t));
+
+ if (pos + count > sizeof(elf_fpregset_t))
+ return -EIO;
+
+ init_fp_ctx(target);
+
+ if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+ err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
+ else
+ err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
+ if (err)
+ return err;
+
+ if (count > 0) {
+ err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &fcr31,
+ fcr31_pos, fcr31_pos + sizeof(u32));
+ if (err)
+ return err;
+
+ ptrace_setfcr31(target, fcr31);
+ }
+
+ return err;
+}
+
enum mips_regset {
REGSET_GPR,
REGSET_FPR,
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 9ade60c..7f2519c 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -1781,7 +1781,7 @@
SPFROMREG(fs, MIPSInst_FS(ir));
SPFROMREG(fd, MIPSInst_FD(ir));
rv.s = ieee754sp_maddf(fd, fs, ft);
- break;
+ goto copcsr;
}
case fmsubf_op: {
@@ -1794,7 +1794,7 @@
SPFROMREG(fs, MIPSInst_FS(ir));
SPFROMREG(fd, MIPSInst_FD(ir));
rv.s = ieee754sp_msubf(fd, fs, ft);
- break;
+ goto copcsr;
}
case frint_op: {
@@ -1818,7 +1818,7 @@
SPFROMREG(fs, MIPSInst_FS(ir));
rv.w = ieee754sp_2008class(fs);
rfmt = w_fmt;
- break;
+ goto copcsr;
}
case fmin_op: {
@@ -1830,7 +1830,7 @@
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmin(fs, ft);
- break;
+ goto copcsr;
}
case fmina_op: {
@@ -1842,7 +1842,7 @@
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmina(fs, ft);
- break;
+ goto copcsr;
}
case fmax_op: {
@@ -1854,7 +1854,7 @@
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmax(fs, ft);
- break;
+ goto copcsr;
}
case fmaxa_op: {
@@ -1866,7 +1866,7 @@
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmaxa(fs, ft);
- break;
+ goto copcsr;
}
case fabs_op:
@@ -2110,7 +2110,7 @@
DPFROMREG(fs, MIPSInst_FS(ir));
DPFROMREG(fd, MIPSInst_FD(ir));
rv.d = ieee754dp_maddf(fd, fs, ft);
- break;
+ goto copcsr;
}
case fmsubf_op: {
@@ -2123,7 +2123,7 @@
DPFROMREG(fs, MIPSInst_FS(ir));
DPFROMREG(fd, MIPSInst_FD(ir));
rv.d = ieee754dp_msubf(fd, fs, ft);
- break;
+ goto copcsr;
}
case frint_op: {
@@ -2147,7 +2147,7 @@
DPFROMREG(fs, MIPSInst_FS(ir));
rv.w = ieee754dp_2008class(fs);
rfmt = w_fmt;
- break;
+ goto copcsr;
}
case fmin_op: {
@@ -2159,7 +2159,7 @@
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmin(fs, ft);
- break;
+ goto copcsr;
}
case fmina_op: {
@@ -2171,7 +2171,7 @@
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmina(fs, ft);
- break;
+ goto copcsr;
}
case fmax_op: {
@@ -2183,7 +2183,7 @@
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmax(fs, ft);
- break;
+ goto copcsr;
}
case fmaxa_op: {
@@ -2195,7 +2195,7 @@
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmaxa(fs, ft);
- break;
+ goto copcsr;
}
case fabs_op:
diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c
index b9920b1..70cef54 100644
--- a/arch/mn10300/mm/misalignment.c
+++ b/arch/mn10300/mm/misalignment.c
@@ -437,7 +437,7 @@
info.si_signo = SIGSEGV;
info.si_errno = 0;
- info.si_code = 0;
+ info.si_code = SEGV_MAPERR;
info.si_addr = (void *) regs->pc;
force_sig_info(SIGSEGV, &info, current);
return;
diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
index 140faa1..1311e6b 100644
--- a/arch/openrisc/include/asm/uaccess.h
+++ b/arch/openrisc/include/asm/uaccess.h
@@ -211,7 +211,7 @@
case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break; \
case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \
case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \
- case 8: __get_user_asm2(x, ptr, retval); \
+ case 8: __get_user_asm2(x, ptr, retval); break; \
default: (x) = __get_user_bad(); \
} \
} while (0)
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 3d3f606..605a284 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -302,12 +302,12 @@
siginfo_t info;
if (user_mode(regs)) {
- /* Send a SIGSEGV */
- info.si_signo = SIGSEGV;
+ /* Send a SIGBUS */
+ info.si_signo = SIGBUS;
info.si_errno = 0;
- /* info.si_code has been set above */
- info.si_addr = (void *)address;
- force_sig_info(SIGSEGV, &info, current);
+ info.si_code = BUS_ADRALN;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGBUS, &info, current);
} else {
printk("KERNEL: Unaligned Access 0x%.8lx\n", address);
show_registers(regs);
diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h
index 8be707e..82dea14 100644
--- a/arch/parisc/include/asm/ldcw.h
+++ b/arch/parisc/include/asm/ldcw.h
@@ -11,6 +11,7 @@
for the semaphore. */
#define __PA_LDCW_ALIGNMENT 16
+#define __PA_LDCW_ALIGN_ORDER 4
#define __ldcw_align(a) ({ \
unsigned long __ret = (unsigned long) &(a)->lock[0]; \
__ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \
@@ -28,6 +29,7 @@
ldcd). */
#define __PA_LDCW_ALIGNMENT 4
+#define __PA_LDCW_ALIGN_ORDER 2
#define __ldcw_align(a) (&(a)->slock)
#define __LDCW "ldcw,co"
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 4fcff2d..e3d3e8e 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -35,6 +35,7 @@
#include <asm/pgtable.h>
#include <asm/signal.h>
#include <asm/unistd.h>
+#include <asm/ldcw.h>
#include <asm/thread_info.h>
#include <linux/linkage.h>
@@ -46,6 +47,14 @@
#endif
.import pa_tlb_lock,data
+ .macro load_pa_tlb_lock reg
+#if __PA_LDCW_ALIGNMENT > 4
+ load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg
+ depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg
+#else
+ load32 PA(pa_tlb_lock), \reg
+#endif
+ .endm
/* space_to_prot macro creates a prot id from a space id */
@@ -457,7 +466,7 @@
.macro tlb_lock spc,ptp,pte,tmp,tmp1,fault
#ifdef CONFIG_SMP
cmpib,COND(=),n 0,\spc,2f
- load32 PA(pa_tlb_lock),\tmp
+ load_pa_tlb_lock \tmp
1: LDCW 0(\tmp),\tmp1
cmpib,COND(=) 0,\tmp1,1b
nop
@@ -480,7 +489,7 @@
/* Release pa_tlb_lock lock. */
.macro tlb_unlock1 spc,tmp
#ifdef CONFIG_SMP
- load32 PA(pa_tlb_lock),\tmp
+ load_pa_tlb_lock \tmp
tlb_unlock0 \spc,\tmp
#endif
.endm
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index adf7187..2d40c4f 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -36,6 +36,7 @@
#include <asm/assembly.h>
#include <asm/pgtable.h>
#include <asm/cache.h>
+#include <asm/ldcw.h>
#include <linux/linkage.h>
.text
@@ -333,8 +334,12 @@
.macro tlb_lock la,flags,tmp
#ifdef CONFIG_SMP
- ldil L%pa_tlb_lock,%r1
- ldo R%pa_tlb_lock(%r1),\la
+#if __PA_LDCW_ALIGNMENT > 4
+ load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
+ depi 0,31,__PA_LDCW_ALIGN_ORDER, \la
+#else
+ load32 pa_tlb_lock, \la
+#endif
rsm PSW_SM_I,\flags
1: LDCW 0(\la),\tmp
cmpib,<>,n 0,\tmp,3f
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 7593787..c3a532a 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -39,6 +39,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/fs.h>
+#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/personality.h>
#include <linux/ptrace.h>
@@ -181,6 +182,44 @@
}
/*
+ * Idle thread support
+ *
+ * Detect when running on QEMU with SeaBIOS PDC Firmware and let
+ * QEMU idle the host too.
+ */
+
+int running_on_qemu __read_mostly;
+
+void __cpuidle arch_cpu_idle_dead(void)
+{
+ /* nop on real hardware, qemu will offline CPU. */
+ asm volatile("or %%r31,%%r31,%%r31\n":::);
+}
+
+void __cpuidle arch_cpu_idle(void)
+{
+ local_irq_enable();
+
+ /* nop on real hardware, qemu will idle sleep. */
+ asm volatile("or %%r10,%%r10,%%r10\n":::);
+}
+
+static int __init parisc_idle_init(void)
+{
+ const char *marker;
+
+ /* check QEMU/SeaBIOS marker in PAGE0 */
+ marker = (char *) &PAGE0->pad0;
+ running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
+
+ if (!running_on_qemu)
+ cpu_idle_poll_ctrl(1);
+
+ return 0;
+}
+arch_initcall(parisc_idle_init);
+
+/*
* Copy architecture-specific thread state
*/
int
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6eda5ab..0a6bb48 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -128,6 +128,7 @@
select ARCH_HAS_GCOV_PROFILE_ALL
select GENERIC_SMP_IDLE_THREAD
select GENERIC_CMOS_UPDATE
+ select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64
select GENERIC_TIME_VSYSCALL_OLD
select GENERIC_CLOCKEVENTS
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 617dece..a60c9c6 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -72,8 +72,15 @@
MULTIPLEWORD := -mmultiple
endif
-cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian)
+ifdef CONFIG_PPC64
+cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1)
+cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc)
+aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2
+endif
+
cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian
+cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian)
ifneq ($(cc-name),clang)
cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align
endif
@@ -113,7 +120,9 @@
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2)
else
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc)
+AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
endif
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index f61cad3..4c935f7 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -201,6 +201,10 @@
unsigned long phys);
extern void hash__vmemmap_remove_mapping(unsigned long start,
unsigned long page_size);
+
+int hash__create_section_mapping(unsigned long start, unsigned long end);
+int hash__remove_section_mapping(unsigned long start, unsigned long end);
+
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 1e8fceb..430d038 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -53,17 +53,25 @@
return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
}
+static inline u32 from64to32(u64 x)
+{
+ /* add up 32-bit and 32-bit for 32+c bit */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up carry.. */
+ x = (x & 0xffffffff) + (x >> 32);
+ return (u32)x;
+}
+
static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
__u8 proto, __wsum sum)
{
#ifdef __powerpc64__
- unsigned long s = (__force u32)sum;
+ u64 s = (__force u32)sum;
s += (__force u32)saddr;
s += (__force u32)daddr;
s += proto + len;
- s += (s >> 32);
- return (__force __wsum) s;
+ return (__force __wsum) from64to32(s);
#else
__asm__("\n\
addc %0,%0,%1 \n\
@@ -100,7 +108,7 @@
#ifdef __powerpc64__
res += (__force u64)addend;
- return (__force __wsum)((u32)res + (res >> 32));
+ return (__force __wsum) from64to32(res);
#else
asm("addc %0,%0,%1;"
"addze %0,%0;"
@@ -123,8 +131,7 @@
for (i = 0; i < ihl - 1; i++, ptr++)
s += *ptr;
- s += (s >> 32);
- return (__force __wsum)s;
+ return (__force __wsum)from64to32(s);
#else
__wsum sum, tmp;
diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
index a703452..555e22d 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -209,5 +209,11 @@
ori r3,r3,vector_offset@l; \
mtspr SPRN_IVOR##vector_number,r3;
+#define RFI_TO_KERNEL \
+ rfi
+
+#define RFI_TO_USER \
+ rfi
+
#endif /* _ASM_POWERPC_EXCEPTION_64E_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 9a3eee6..cab6d2a 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -51,6 +51,59 @@
#define EX_PPR 88 /* SMT thread status register (priority) */
#define EX_CTR 96
+/*
+ * Macros for annotating the expected destination of (h)rfid
+ *
+ * The nop instructions allow us to insert one or more instructions to flush the
+ * L1-D cache when returning to userspace or a guest.
+ */
+#define RFI_FLUSH_SLOT \
+ RFI_FLUSH_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop
+
+#define RFI_TO_KERNEL \
+ rfid
+
+#define RFI_TO_USER \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define RFI_TO_USER_OR_KERNEL \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define RFI_TO_GUEST \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define HRFI_TO_KERNEL \
+ hrfid
+
+#define HRFI_TO_USER \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_USER_OR_KERNEL \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_GUEST \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_UNKNOWN \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
#ifdef CONFIG_RELOCATABLE
#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index ddf54f5..7b33234 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -189,4 +189,19 @@
void setup_feature_keys(void);
#endif
+#define RFI_FLUSH_FIXUP_SECTION \
+951: \
+ .pushsection __rfi_flush_fixup,"a"; \
+ .align 2; \
+952: \
+ FTR_ENTRY_OFFSET 951b-952b; \
+ .popsection;
+
+
+#ifndef __ASSEMBLY__
+
+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+
+#endif
+
#endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 708edeb..dc0996b 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -240,6 +240,7 @@
#define H_GET_HCA_INFO 0x1B8
#define H_GET_PERF_COUNT 0x1BC
#define H_MANAGE_TRACE 0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
#define H_QUERY_INT_STATE 0x1E4
#define H_POLL_PENDING 0x1D8
@@ -306,7 +307,19 @@
#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3
#define H_SET_MODE_RESOURCE_LE 4
+/* H_GET_CPU_CHARACTERISTICS return values */
+#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0
+#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1
+#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
+#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
+#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
+
#ifndef __ASSEMBLY__
+#include <linux/types.h>
/**
* plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments
@@ -433,6 +446,11 @@
}
#endif /* CONFIG_PPC_PSERIES */
+struct h_cpu_char_result {
+ u64 character;
+ u64 behaviour;
+};
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 6a6792b..ea43897 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -205,6 +205,16 @@
struct sibling_subcore_state *sibling_subcore_state;
#endif
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * rfi fallback flush must be in its own cacheline to prevent
+ * other paca data leaking into the L1d
+ */
+ u64 exrfi[13] __aligned(0x80);
+ void *rfi_flush_fallback_area;
+ u64 l1d_flush_congruence;
+ u64 l1d_flush_sets;
+#endif
};
#ifdef CONFIG_PPC_BOOK3S
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 1b39424..4e53b85 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -340,4 +340,18 @@
return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0);
}
+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
+ if (rc == H_SUCCESS) {
+ p->character = retbuf[0];
+ p->behaviour = retbuf[1];
+ }
+
+ return rc;
+}
+
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 654d64c..6825a67 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -38,6 +38,19 @@
static inline void pseries_little_endian_exceptions(void) {}
#endif /* CONFIG_PPC_PSERIES */
+void rfi_flush_enable(bool enable);
+
+/* These are bit flags */
+enum l1d_flush_type {
+ L1D_FLUSH_NONE = 0x1,
+ L1D_FLUSH_FALLBACK = 0x2,
+ L1D_FLUSH_ORI = 0x4,
+ L1D_FLUSH_MTTRIG = 0x8,
+};
+
+void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void do_rfi_flush_fixups(enum l1d_flush_type types);
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c833d88..64bcbd5 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -240,6 +240,10 @@
#ifdef CONFIG_PPC_BOOK3S_64
DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp));
DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
+ DEFINE(PACA_RFI_FLUSH_FALLBACK_AREA, offsetof(struct paca_struct, rfi_flush_fallback_area));
+ DEFINE(PACA_EXRFI, offsetof(struct paca_struct, exrfi));
+ DEFINE(PACA_L1D_FLUSH_CONGRUENCE, offsetof(struct paca_struct, l1d_flush_congruence));
+ DEFINE(PACA_L1D_FLUSH_SETS, offsetof(struct paca_struct, l1d_flush_sets));
#endif
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 7803756..9e05c88 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -97,6 +97,7 @@
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PID,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
or r3, r3, r4
@@ -119,6 +120,7 @@
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PID,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
or r3, r3, r4
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index caa6596..c33b69d 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -251,13 +251,23 @@
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
+ ld r2,GPR2(r1)
+ ld r1,GPR1(r1)
+ mtlr r4
+ mtcr r5
+ mtspr SPRN_SRR0,r7
+ mtspr SPRN_SRR1,r8
+ RFI_TO_USER
+ b . /* prevent speculative execution */
+
+ /* exit to kernel */
1: ld r2,GPR2(r1)
ld r1,GPR1(r1)
mtlr r4
mtcr r5
mtspr SPRN_SRR0,r7
mtspr SPRN_SRR1,r8
- RFI
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
syscall_error:
@@ -859,7 +869,7 @@
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
REST_GPR(13, r1)
-1:
+
mtspr SPRN_SRR1,r3
ld r2,_CCR(r1)
@@ -872,8 +882,22 @@
ld r3,GPR3(r1)
ld r4,GPR4(r1)
ld r1,GPR1(r1)
+ RFI_TO_USER
+ b . /* prevent speculative execution */
- rfid
+1: mtspr SPRN_SRR1,r3
+
+ ld r2,_CCR(r1)
+ mtcrf 0xFF,r2
+ ld r2,_NIP(r1)
+ mtspr SPRN_SRR0,r2
+
+ ld r0,GPR0(r1)
+ ld r2,GPR2(r1)
+ ld r3,GPR3(r1)
+ ld r4,GPR4(r1)
+ ld r1,GPR1(r1)
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
#endif /* CONFIG_PPC_BOOK3E */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index fd68e19..96db6c3 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -655,6 +655,8 @@
andi. r10,r12,MSR_RI /* check for unrecoverable exception */
beq- 2f
+ andi. r10,r12,MSR_PR /* check for user mode (PR != 0) */
+ bne 1f
/* All done -- return from exception. */
@@ -671,7 +673,23 @@
ld r11,PACA_EXSLB+EX_R11(r13)
ld r12,PACA_EXSLB+EX_R12(r13)
ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
+ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+
+1:
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
+
+ RESTORE_PPR_PACA(PACA_EXSLB, r9)
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ RFI_TO_USER
b . /* prevent speculative execution */
2: mfspr r11,SPRN_SRR0
@@ -679,7 +697,7 @@
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
- rfid
+ RFI_TO_KERNEL
b .
8: mfspr r11,SPRN_SRR0
@@ -1576,6 +1594,92 @@
bl kernel_bad_stack
b 1b
+ .globl rfi_flush_fallback
+rfi_flush_fallback:
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ std r12,PACA_EXRFI+EX_R12(r13)
+ std r8,PACA_EXRFI+EX_R13(r13)
+ mfctr r9
+ ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+ ld r11,PACA_L1D_FLUSH_SETS(r13)
+ ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+ addi r12,r12,8
+ mtctr r11
+ DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+ /* order ld/st prior to dcbt stop all streams with flushing */
+ sync
+1: li r8,0
+ .rept 8 /* 8-way set associative */
+ ldx r11,r10,r8
+ add r8,r8,r12
+ xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
+ add r8,r8,r11 // Add 0, this creates a dependency on the ldx
+ .endr
+ addi r10,r10,128 /* 128 byte cache line */
+ bdnz 1b
+
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ ld r12,PACA_EXRFI+EX_R12(r13)
+ ld r8,PACA_EXRFI+EX_R13(r13)
+ GET_SCRATCH0(r13);
+ rfid
+
+ .globl hrfi_flush_fallback
+hrfi_flush_fallback:
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ std r12,PACA_EXRFI+EX_R12(r13)
+ std r8,PACA_EXRFI+EX_R13(r13)
+ mfctr r9
+ ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+ ld r11,PACA_L1D_FLUSH_SETS(r13)
+ ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+ addi r12,r12,8
+ mtctr r11
+ DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+ /* order ld/st prior to dcbt stop all streams with flushing */
+ sync
+1: li r8,0
+ .rept 8 /* 8-way set associative */
+ ldx r11,r10,r8
+ add r8,r8,r12
+ xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
+ add r8,r8,r11 // Add 0, this creates a dependency on the ldx
+ .endr
+ addi r10,r10,128 /* 128 byte cache line */
+ bdnz 1b
+
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ ld r12,PACA_EXRFI+EX_R12(r13)
+ ld r8,PACA_EXRFI+EX_R13(r13)
+ GET_SCRATCH0(r13);
+ hrfid
+
/*
* Called from arch_local_irq_enable when an interrupt needs
* to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index a12be60..7c30a91 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -37,6 +37,7 @@
#include <linux/memblock.h>
#include <linux/memory.h>
#include <linux/nmi.h>
+#include <linux/debugfs.h>
#include <asm/io.h>
#include <asm/kdump.h>
@@ -678,4 +679,142 @@
return 0;
}
early_initcall(disable_hardlockup_detector);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+bool rfi_flush;
+
+static int __init handle_no_rfi_flush(char *p)
+{
+ pr_info("rfi-flush: disabled on command line.");
+ no_rfi_flush = true;
+ return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+ pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+ handle_no_rfi_flush(NULL);
+ return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+ /*
+ * We don't need to do the flush explicitly, just enter+exit kernel is
+ * sufficient, the RFI exit handlers will do the right thing.
+ */
+}
+
+void rfi_flush_enable(bool enable)
+{
+ if (rfi_flush == enable)
+ return;
+
+ if (enable) {
+ do_rfi_flush_fixups(enabled_flush_types);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else
+ do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+ rfi_flush = enable;
+}
+
+static void init_fallback_flush(void)
+{
+ u64 l1d_size, limit;
+ int cpu;
+
+ l1d_size = ppc64_caches.dsize;
+ limit = min(safe_stack_limit(), ppc64_rma_size);
+
+ /*
+ * Align to L1d size, and size it at 2x L1d size, to catch possible
+ * hardware prefetch runoff. We don't have a recipe for load patterns to
+ * reliably avoid the prefetcher.
+ */
+ l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
+ memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+
+ for_each_possible_cpu(cpu) {
+ /*
+ * The fallback flush is currently coded for 8-way
+ * associativity. Different associativity is possible, but it
+ * will be treated as 8-way and may not evict the lines as
+ * effectively.
+ *
+ * 128 byte lines are mandatory.
+ */
+ u64 c = l1d_size / 8;
+
+ paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
+ paca[cpu].l1d_flush_congruence = c;
+ paca[cpu].l1d_flush_sets = c / 128;
+ }
+}
+
+void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+ if (types & L1D_FLUSH_FALLBACK) {
+ pr_info("rfi-flush: Using fallback displacement flush\n");
+ init_fallback_flush();
+ }
+
+ if (types & L1D_FLUSH_ORI)
+ pr_info("rfi-flush: Using ori type flush\n");
+
+ if (types & L1D_FLUSH_MTTRIG)
+ pr_info("rfi-flush: Using mttrig type flush\n");
+
+ enabled_flush_types = types;
+
+ if (!no_rfi_flush)
+ rfi_flush_enable(enable);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int rfi_flush_set(void *data, u64 val)
+{
+ if (val == 1)
+ rfi_flush_enable(true);
+ else if (val == 0)
+ rfi_flush_enable(false);
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int rfi_flush_get(void *data, u64 *val)
+{
+ *val = rfi_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+
+static __init int rfi_flush_debugfs_init(void)
+{
+ debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+ return 0;
+}
+device_initcall(rfi_flush_debugfs_init);
+#endif
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (rfi_flush)
+ return sprintf(buf, "Mitigation: RFI Flush\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
#endif
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 7394b77..b61fb79 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -132,6 +132,15 @@
/* Read-only data */
RODATA
+#ifdef CONFIG_PPC64
+ . = ALIGN(8);
+ __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
+ __start___rfi_flush_fixup = .;
+ *(__rfi_flush_fixup)
+ __stop___rfi_flush_fixup = .;
+ }
+#endif
+
EXCEPTION_TABLE(0)
NOTES :kernel :notes
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 043415f..e86bfa1 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -23,6 +23,7 @@
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/firmware.h>
+#include <asm/setup.h>
struct fixup_entry {
unsigned long mask;
@@ -115,6 +116,47 @@
}
}
+#ifdef CONFIG_PPC_BOOK3S_64
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+ unsigned int instrs[3], *dest;
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___rfi_flush_fixup),
+ end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+ instrs[0] = 0x60000000; /* nop */
+ instrs[1] = 0x60000000; /* nop */
+ instrs[2] = 0x60000000; /* nop */
+
+ if (types & L1D_FLUSH_FALLBACK)
+ /* b .+16 to fallback flush */
+ instrs[0] = 0x48000010;
+
+ i = 0;
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+ for (i = 0; start < end; start++, i++) {
+ dest = (void *)start + *start;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ patch_instruction(dest, instrs[0]);
+ patch_instruction(dest + 1, instrs[1]);
+ patch_instruction(dest + 2, instrs[2]);
+ }
+
+ printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
{
long *start, *end;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 78dabf06..bd66628 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -747,7 +747,7 @@
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int create_section_mapping(unsigned long start, unsigned long end)
+int hash__create_section_mapping(unsigned long start, unsigned long end)
{
int rc = htab_bolt_mapping(start, end, __pa(start),
pgprot_val(PAGE_KERNEL), mmu_linear_psize,
@@ -761,7 +761,7 @@
return rc;
}
-int remove_section_mapping(unsigned long start, unsigned long end)
+int hash__remove_section_mapping(unsigned long start, unsigned long end)
{
int rc = htab_remove_mapping(start, end, mmu_linear_psize,
mmu_kernel_ssize);
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index f4f437c..0fad7f6 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -125,3 +125,21 @@
else if (mmu_hash_ops.hpte_clear_all)
mmu_hash_ops.hpte_clear_all();
}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int create_section_mapping(unsigned long start, unsigned long end)
+{
+ if (radix_enabled())
+ return -ENODEV;
+
+ return hash__create_section_mapping(start, end);
+}
+
+int remove_section_mapping(unsigned long start, unsigned long end)
+{
+ if (radix_enabled())
+ return -ENODEV;
+
+ return hash__remove_section_mapping(start, end);
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 9a25dce..44c33ee 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -173,6 +173,10 @@
*/
register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
}
static void __init radix_init_partition_table(void)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 72c27b8..083f9274 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -401,8 +401,12 @@
int ret;
__u64 target;
- if (is_kernel_addr(addr))
- return branch_target((unsigned int *)addr);
+ if (is_kernel_addr(addr)) {
+ if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+ return 0;
+
+ return branch_target(&instr);
+ }
/* Userspace: need copy instruction here then translate it */
pagefault_disable();
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 7b2ca16..991c6a5 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -516,7 +516,7 @@
{
if (s1 < s2)
return 1;
- if (s2 > s1)
+ if (s1 > s2)
return -1;
return memcmp(d1, d2, s1);
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index 83bebee..0f7b16e 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -39,18 +39,18 @@
int token;
spin_lock_irqsave(&opal_async_comp_lock, flags);
- token = find_first_bit(opal_async_complete_map, opal_max_async_tokens);
+ token = find_first_zero_bit(opal_async_token_map, opal_max_async_tokens);
if (token >= opal_max_async_tokens) {
token = -EBUSY;
goto out;
}
- if (__test_and_set_bit(token, opal_async_token_map)) {
+ if (!__test_and_clear_bit(token, opal_async_complete_map)) {
token = -EBUSY;
goto out;
}
- __clear_bit(token, opal_async_complete_map);
+ __set_bit(token, opal_async_token_map);
out:
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index dcdfee0..f602307 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2623,6 +2623,9 @@
level_shift = entries_shift + 3;
level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
+ if ((level_shift - 3) * levels + page_shift >= 60)
+ return -EINVAL;
+
/* Allocate TCE table */
addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
levels, tce_table_size, &offset, &total_allocated);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index efe8b6b..6f8b4c1 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -35,13 +35,63 @@
#include <asm/opal.h>
#include <asm/kexec.h>
#include <asm/smp.h>
+#include <asm/tm.h>
+#include <asm/setup.h>
#include "powernv.h"
+static void pnv_setup_rfi_flush(void)
+{
+ struct device_node *np, *fw_features;
+ enum l1d_flush_type type;
+ int enable;
+
+ /* Default to fallback in case fw-features are not available */
+ type = L1D_FLUSH_FALLBACK;
+ enable = 1;
+
+ np = of_find_node_by_name(NULL, "ibm,opal");
+ fw_features = of_get_child_by_name(np, "fw-features");
+ of_node_put(np);
+
+ if (fw_features) {
+ np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
+ if (np && of_property_read_bool(np, "enabled"))
+ type = L1D_FLUSH_MTTRIG;
+
+ of_node_put(np);
+
+ np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
+ if (np && of_property_read_bool(np, "enabled"))
+ type = L1D_FLUSH_ORI;
+
+ of_node_put(np);
+
+ /* Enable unless firmware says NOT to */
+ enable = 2;
+ np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
+ if (np && of_property_read_bool(np, "disabled"))
+ enable--;
+
+ of_node_put(np);
+
+ np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
+ if (np && of_property_read_bool(np, "disabled"))
+ enable--;
+
+ of_node_put(np);
+ of_node_put(fw_features);
+ }
+
+ setup_rfi_flush(type, enable > 0);
+}
+
static void __init pnv_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+ pnv_setup_rfi_flush();
+
/* Initialize SMP */
pnv_smp_init();
@@ -289,7 +339,7 @@
{
unsigned long ret_freq;
- ret_freq = cpufreq_quick_get(cpu) * 1000ul;
+ ret_freq = cpufreq_get(cpu) * 1000ul;
/*
* If the backend cpufreq driver does not exist,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 97aa3f3..1845fc6 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -450,6 +450,39 @@
of_pci_check_probe_only();
}
+static void pseries_setup_rfi_flush(void)
+{
+ struct h_cpu_char_result result;
+ enum l1d_flush_type types;
+ bool enable;
+ long rc;
+
+ /* Enable by default */
+ enable = true;
+
+ rc = plpar_get_cpu_characteristics(&result);
+ if (rc == H_SUCCESS) {
+ types = L1D_FLUSH_NONE;
+
+ if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+ types |= L1D_FLUSH_MTTRIG;
+ if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+ types |= L1D_FLUSH_ORI;
+
+ /* Use fallback if nothing set in hcall */
+ if (types == L1D_FLUSH_NONE)
+ types = L1D_FLUSH_FALLBACK;
+
+ if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+ enable = false;
+ } else {
+ /* Default to fallback if case hcall is not available */
+ types = L1D_FLUSH_FALLBACK;
+ }
+
+ setup_rfi_flush(types, enable);
+}
+
static void __init pSeries_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -467,6 +500,8 @@
fwnmi_init();
+ pseries_setup_rfi_flush();
+
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index ada29ea..f523ac8 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -274,7 +274,9 @@
if (bank->disk->major > 0)
unregister_blkdev(bank->disk->major,
bank->disk->disk_name);
- del_gendisk(bank->disk);
+ if (bank->disk->flags & GENHD_FL_UP)
+ del_gendisk(bank->disk);
+ put_disk(bank->disk);
}
device->dev.platform_data = NULL;
if (bank->io_addr != 0)
@@ -299,6 +301,7 @@
device_remove_file(&device->dev, &dev_attr_ecc);
free_irq(bank->irq_id, device);
del_gendisk(bank->disk);
+ put_disk(bank->disk);
iounmap((void __iomem *) bank->io_addr);
kfree(bank);
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index f267ee0..716353b 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -845,12 +845,12 @@
u32 ipic_get_mcp_status(void)
{
- return ipic_read(primary_ipic->regs, IPIC_SERMR);
+ return ipic_read(primary_ipic->regs, IPIC_SERSR);
}
void ipic_clear_mcp_status(u32 mask)
{
- ipic_write(primary_ipic->regs, IPIC_SERMR, mask);
+ ipic_write(primary_ipic->regs, IPIC_SERSR, mask);
}
/* Return an interrupt vector or 0 if no interrupt is pending. */
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index 649eb62..9e02cb7 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -81,6 +81,6 @@
int zpci_load(u64 *data, u64 req, u64 offset);
int zpci_store(u64 data, u64 req, u64 offset);
int zpci_store_block(const u64 *data, u64 req, u64 offset);
-void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
+int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
#endif
diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h
index 402ad6d..c54a931 100644
--- a/arch/s390/include/asm/runtime_instr.h
+++ b/arch/s390/include/asm/runtime_instr.h
@@ -85,6 +85,8 @@
load_runtime_instr_cb(&runtime_instr_empty_cb);
}
-void exit_thread_runtime_instr(void);
+struct task_struct;
+
+void runtime_instr_release(struct task_struct *tsk);
#endif /* _RUNTIME_INSTR_H */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index dde6b52..ff2fbda 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -29,17 +29,16 @@
}
#define switch_to(prev,next,last) do { \
- if (prev->mm) { \
- save_fpu_regs(); \
- save_access_regs(&prev->thread.acrs[0]); \
- save_ri_cb(prev->thread.ri_cb); \
- } \
+ /* save_fpu_regs() sets the CIF_FPU flag, which enforces \
+ * a restore of the floating point / vector registers as \
+ * soon as the next task returns to user space \
+ */ \
+ save_fpu_regs(); \
+ save_access_regs(&prev->thread.acrs[0]); \
+ save_ri_cb(prev->thread.ri_cb); \
update_cr_regs(next); \
- if (next->mm) { \
- set_cpu_flag(CIF_FPU); \
- restore_access_regs(&next->thread.acrs[0]); \
- restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
- } \
+ restore_access_regs(&next->thread.acrs[0]); \
+ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
prev = __switch_to(prev,next); \
} while (0)
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 0f9cd90..f06a9a0 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -263,6 +263,7 @@
return retval;
}
+ groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 172fe11..8382fc6 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -70,8 +70,6 @@
*/
void exit_thread(struct task_struct *tsk)
{
- if (tsk == current)
- exit_thread_runtime_instr();
}
void flush_thread(void)
@@ -84,6 +82,7 @@
void arch_release_task_struct(struct task_struct *tsk)
{
+ runtime_instr_release(tsk);
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index 70cdb03..fd03a75 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -18,11 +18,24 @@
/* empty control block to disable RI by loading it */
struct runtime_instr_cb runtime_instr_empty_cb;
+void runtime_instr_release(struct task_struct *tsk)
+{
+ kfree(tsk->thread.ri_cb);
+}
+
static void disable_runtime_instr(void)
{
- struct pt_regs *regs = task_pt_regs(current);
+ struct task_struct *task = current;
+ struct pt_regs *regs;
+ if (!task->thread.ri_cb)
+ return;
+ regs = task_pt_regs(task);
+ preempt_disable();
load_runtime_instr_cb(&runtime_instr_empty_cb);
+ kfree(task->thread.ri_cb);
+ task->thread.ri_cb = NULL;
+ preempt_enable();
/*
* Make sure the RI bit is deleted from the PSW. If the user did not
@@ -43,19 +56,6 @@
cb->valid = 1;
}
-void exit_thread_runtime_instr(void)
-{
- struct task_struct *task = current;
-
- preempt_disable();
- if (!task->thread.ri_cb)
- return;
- disable_runtime_instr();
- kfree(task->thread.ri_cb);
- task->thread.ri_cb = NULL;
- preempt_enable();
-}
-
SYSCALL_DEFINE1(s390_runtime_instr, int, command)
{
struct runtime_instr_cb *cb;
@@ -64,7 +64,7 @@
return -EOPNOTSUPP;
if (command == S390_RUNTIME_INSTR_STOP) {
- exit_thread_runtime_instr();
+ disable_runtime_instr();
return 0;
}
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9b59e62..709da45 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -369,10 +369,10 @@
SYSCALL(sys_sendmmsg,compat_sys_sendmmsg)
SYSCALL(sys_socket,sys_socket)
SYSCALL(sys_socketpair,compat_sys_socketpair) /* 360 */
-SYSCALL(sys_bind,sys_bind)
-SYSCALL(sys_connect,sys_connect)
+SYSCALL(sys_bind,compat_sys_bind)
+SYSCALL(sys_connect,compat_sys_connect)
SYSCALL(sys_listen,sys_listen)
-SYSCALL(sys_accept4,sys_accept4)
+SYSCALL(sys_accept4,compat_sys_accept4)
SYSCALL(sys_getsockopt,compat_sys_getsockopt) /* 365 */
SYSCALL(sys_setsockopt,compat_sys_setsockopt)
SYSCALL(sys_getsockname,compat_sys_getsockname)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index e184353..c2905a1 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -197,8 +197,6 @@
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
return -EAGAIN;
}
- if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
return 0;
}
@@ -209,6 +207,9 @@
int reg1, reg2;
int rc;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
rc = try_handle_skey(vcpu);
if (rc)
return rc != -EAGAIN ? rc : 0;
@@ -238,6 +239,9 @@
int reg1, reg2;
int rc;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
rc = try_handle_skey(vcpu);
if (rc)
return rc != -EAGAIN ? rc : 0;
@@ -273,6 +277,9 @@
int reg1, reg2;
int rc;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
rc = try_handle_skey(vcpu);
if (rc)
return rc != -EAGAIN ? rc : 0;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 15ffc19..03a1d59 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -354,7 +354,8 @@
/* End of second scan with interrupts on. */
break;
/* First scan complete, reenable interrupts. */
- zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC))
+ break;
si = 0;
continue;
}
@@ -928,7 +929,7 @@
if (!s390_pci_probe)
return 0;
- if (!test_facility(69) || !test_facility(71) || !test_facility(72))
+ if (!test_facility(69) || !test_facility(71))
return 0;
rc = zpci_debug_init();
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index fa8d7d4..248146d 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -7,6 +7,7 @@
#include <linux/export.h>
#include <linux/errno.h>
#include <linux/delay.h>
+#include <asm/facility.h>
#include <asm/pci_insn.h>
#include <asm/pci_debug.h>
#include <asm/processor.h>
@@ -91,11 +92,14 @@
}
/* Set Interruption Controls */
-void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
{
+ if (!test_facility(72))
+ return -EIO;
asm volatile (
" .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
: : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused));
+ return 0;
}
/* PCI Load */
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index ff63934..c5b9977 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -607,7 +607,8 @@
break;
}
- force_sig_info(SIGFPE, &info, current);
+ info.si_signo = SIGFPE;
+ force_sig_info(info.si_signo, &info, current);
}
#endif
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 57154c6..0f183ff 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2391,10 +2391,17 @@
{
high_memory = __va(last_valid_pfn << PAGE_SHIFT);
- register_page_bootmem_info();
free_all_bootmem();
/*
+ * Must be done after boot memory is put on freelist, because here we
+ * might set fields in deferred struct pages that have not yet been
+ * initialized, and free_all_bootmem() initializes all the reserved
+ * deferred pages for us.
+ */
+ register_page_bootmem_info();
+
+ /*
* Set up the zero page, mark it reserved, so that page count
* is not manipulated when freeing the page from user ptes.
*/
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index c7f2a52..83a73cf 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -54,6 +54,7 @@
enum mbus_module srmmu_modtype;
static unsigned int hwbug_bitmask;
int vac_cache_size;
+EXPORT_SYMBOL(vac_cache_size);
int vac_line_size;
extern struct resource sparc_iomap;
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 0ca46ede..9c150cc 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -117,7 +117,7 @@
archprepare: include/generated/user_constants.h
LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
+LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie)
CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
$(call cc-option, -fno-stack-protector,) \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b9c546a..0ca4d12 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -45,7 +45,7 @@
select ARCH_USE_CMPXCHG_LOCKREF if X86_64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
- select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
+ select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_IPC_PARSE_VERSION if X86_32
@@ -64,6 +64,7 @@
select GENERIC_CLOCKEVENTS_MIN_ADJUST
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_FIND_FIRST_BIT
select GENERIC_IOMAP
@@ -407,6 +408,19 @@
def_bool y
depends on X86_GOLDFISH
+config RETPOLINE
+ bool "Avoid speculative indirect branches in kernel"
+ default y
+ ---help---
+ Compile kernel with the retpoline compiler options to guard against
+ kernel-to-user data leaks by avoiding speculative indirect
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+ Without compiler support, at least indirect branches in assembler
+ code are eliminated. Since this includes the syscall entry path,
+ it is not entirely pointless.
+
if X86_32
config X86_EXTENDED_PLATFORM
bool "Support for extended (non-PC) x86 platforms"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index ad1abc1..ea24114 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -203,6 +203,14 @@
KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ ifneq ($(RETPOLINE_CFLAGS),)
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+ endif
+endif
+
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 766a521..2728e1b 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -9,6 +9,7 @@
*/
#undef CONFIG_PARAVIRT
#undef CONFIG_PARAVIRT_SPINLOCKS
+#undef CONFIG_PAGE_TABLE_ISOLATION
#undef CONFIG_KASAN
#include <linux/linkage.h>
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 383a6f8..fa8801b 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,6 +32,7 @@
#include <linux/linkage.h>
#include <asm/inst.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
/*
* The following macros are used to move an (un)aligned 16 byte value to/from
@@ -2734,7 +2735,7 @@
pxor INC, STATE4
movdqu IV, 0x30(OUTP)
- call *%r11
+ CALL_NOSPEC %r11
movdqu 0x00(OUTP), INC
pxor INC, STATE1
@@ -2779,7 +2780,7 @@
_aesni_gf128mul_x_ble()
movups IV, (IVP)
- call *%r11
+ CALL_NOSPEC %r11
movdqu 0x40(OUTP), INC
pxor INC, STATE1
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index aa8b067..d9ae404 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -906,7 +906,7 @@
if (sg_is_last(req->src) &&
req->src->offset + req->src->length <= PAGE_SIZE &&
- sg_is_last(req->dst) &&
++ sg_is_last(req->dst) && req->dst->length &&
req->dst->offset + req->dst->length <= PAGE_SIZE) {
one_entry_in_sg = 1;
scatterwalk_start(&src_sg_walk, req->src);
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index aa9e8bd..77ff4de 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -17,6 +17,7 @@
#include <linux/linkage.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -1224,7 +1225,7 @@
vpxor 14 * 16(%rax), %xmm15, %xmm14;
vpxor 15 * 16(%rax), %xmm15, %xmm15;
- call *%r9;
+ CALL_NOSPEC %r9;
addq $(16 * 16), %rsp;
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 16186c1..7384342 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -12,6 +12,7 @@
#include <linux/linkage.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -1337,7 +1338,7 @@
vpxor 14 * 32(%rax), %ymm15, %ymm14;
vpxor 15 * 32(%rax), %ymm15, %ymm15;
- call *%r9;
+ CALL_NOSPEC %r9;
addq $(16 * 32), %rsp;
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index dc05f01..174fd41 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -45,6 +45,7 @@
#include <asm/inst.h>
#include <linux/linkage.h>
+#include <asm/nospec-branch.h>
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
@@ -172,7 +173,7 @@
movzxw (bufp, %rax, 2), len
lea crc_array(%rip), bufp
lea (bufp, len, 1), bufp
- jmp *bufp
+ JMP_NOSPEC bufp
################################################################
## 2a) PROCESS FULL BLOCKS:
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index e32142b..28c3720 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -164,7 +164,6 @@
.init = poly1305_simd_init,
.update = poly1305_simd_update,
.final = crypto_poly1305_final,
- .setkey = crypto_poly1305_setkey,
.descsize = sizeof(struct poly1305_simd_desc_ctx),
.base = {
.cra_name = "poly1305",
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index 399a29d..cb91a64 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -59,13 +59,6 @@
salsa20_ivsetup(ctx, walk.iv);
- if (likely(walk.nbytes == nbytes))
- {
- salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
- walk.dst.virt.addr, nbytes);
- return blkcipher_walk_done(desc, &walk, 0);
- }
-
while (walk.nbytes >= 64) {
salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
walk.dst.virt.addr,
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
index 36870b2..d088050 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
@@ -57,10 +57,12 @@
{
unsigned int j;
- state->lens[0] = 0;
- state->lens[1] = 1;
- state->lens[2] = 2;
- state->lens[3] = 3;
+ /* initially all lanes are unused */
+ state->lens[0] = 0xFFFFFFFF00000000;
+ state->lens[1] = 0xFFFFFFFF00000001;
+ state->lens[2] = 0xFFFFFFFF00000002;
+ state->lens[3] = 0xFFFFFFFF00000003;
+
state->unused_lanes = 0xFF03020100;
for (j = 0; j < 4; j++)
state->ldata[j].job_in_lane = NULL;
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index bdd9cc5..b0cd306 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -20,6 +20,7 @@
#include <linux/export.h>
#include <linux/context_tracking.h>
#include <linux/user-return-notifier.h>
+#include <linux/nospec.h>
#include <linux/uprobes.h>
#include <asm/desc.h>
@@ -201,7 +202,7 @@
* special case only applies after poking regs and before the
* very next return to user mode.
*/
- current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
#endif
user_enter_irqoff();
@@ -277,7 +278,8 @@
* regs->orig_ax, which changes the behavior of some syscalls.
*/
if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
- regs->ax = sys_call_table[nr & __SYSCALL_MASK](
+ nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls);
+ regs->ax = sys_call_table[nr](
regs->di, regs->si, regs->dx,
regs->r10, regs->r8, regs->r9);
}
@@ -299,7 +301,7 @@
unsigned int nr = (unsigned int)regs->orig_ax;
#ifdef CONFIG_IA32_EMULATION
- current->thread.status |= TS_COMPAT;
+ ti->status |= TS_COMPAT;
#endif
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
@@ -313,6 +315,7 @@
}
if (likely(nr < IA32_NR_syscalls)) {
+ nr = array_index_nospec(nr, IA32_NR_syscalls);
/*
* It's possible that a 32-bit syscall implementation
* takes a 64-bit parameter but nonetheless assumes that
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index edba860..f5434b4 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -45,6 +45,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/export.h>
+#include <asm/nospec-branch.h>
.section .entry.text, "ax"
@@ -228,6 +229,18 @@
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
#endif
+#ifdef CONFIG_RETPOLINE
+ /*
+ * When switching from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+ * with userspace addresses. On CPUs where those concerns
+ * exist, overwrite the RSB with entries which capture
+ * speculative execution to prevent attack.
+ */
+ /* Clobbers %ebx */
+ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl %esi
popl %edi
@@ -260,7 +273,7 @@
/* kernel thread */
1: movl %edi, %eax
- call *%ebx
+ CALL_NOSPEC %ebx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
@@ -984,7 +997,8 @@
movl 0x4(%ebp), %edx
subl $MCOUNT_INSN_SIZE, %eax
- call *ftrace_trace_function
+ movl ftrace_trace_function, %ecx
+ CALL_NOSPEC %ecx
popl %edx
popl %ecx
@@ -1020,7 +1034,7 @@
movl %eax, %ecx
popl %edx
popl %eax
- jmp *%ecx
+ JMP_NOSPEC %ecx
#endif
#ifdef CONFIG_TRACING
@@ -1062,7 +1076,7 @@
movl %ecx, %es
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- call *%edi
+ CALL_NOSPEC %edi
jmp ret_from_exception
END(page_fault)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index be9df51..8be2aaa 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -36,6 +36,8 @@
#include <asm/smap.h>
#include <asm/pgtable_types.h>
#include <asm/export.h>
+#include <asm/kaiser.h>
+#include <asm/nospec-branch.h>
#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -54,19 +56,15 @@
ENDPROC(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
-.macro TRACE_IRQS_FLAGS flags:req
+.macro TRACE_IRQS_IRETQ
#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9, \flags /* interrupts off? */
+ bt $9, EFLAGS(%rsp) /* interrupts off? */
jnc 1f
TRACE_IRQS_ON
1:
#endif
.endm
-.macro TRACE_IRQS_IRETQ
- TRACE_IRQS_FLAGS EFLAGS(%rsp)
-.endm
-
/*
* When dynamic function tracer is enabled it will add a breakpoint
* to all locations that it is about to modify, sync CPUs, update
@@ -150,6 +148,7 @@
* it is too small to ever cause noticeable irq latency.
*/
SWAPGS_UNSAFE_STACK
+ SWITCH_KERNEL_CR3_NO_STACK
/*
* A hypervisor implementation might want to use a label
* after the swapgs, so that it can do the swapgs
@@ -178,83 +177,17 @@
pushq %r9 /* pt_regs->r9 */
pushq %r10 /* pt_regs->r10 */
pushq %r11 /* pt_regs->r11 */
- sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+ pushq %rbx /* pt_regs->rbx */
+ pushq %rbp /* pt_regs->rbp */
+ pushq %r12 /* pt_regs->r12 */
+ pushq %r13 /* pt_regs->r13 */
+ pushq %r14 /* pt_regs->r14 */
+ pushq %r15 /* pt_regs->r15 */
- /*
- * If we need to do entry work or if we guess we'll need to do
- * exit work, go straight to the slow path.
- */
- movq PER_CPU_VAR(current_task), %r11
- testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
- jnz entry_SYSCALL64_slow_path
-
-entry_SYSCALL_64_fastpath:
- /*
- * Easy case: enable interrupts and issue the syscall. If the syscall
- * needs pt_regs, we'll call a stub that disables interrupts again
- * and jumps to the slow path.
- */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
-#if __SYSCALL_MASK == ~0
- cmpq $__NR_syscall_max, %rax
-#else
- andl $__SYSCALL_MASK, %eax
- cmpl $__NR_syscall_max, %eax
-#endif
- ja 1f /* return -ENOSYS (already in pt_regs->ax) */
- movq %r10, %rcx
-
- /*
- * This call instruction is handled specially in stub_ptregs_64.
- * It might end up jumping to the slow path. If it jumps, RAX
- * and all argument registers are clobbered.
- */
- call *sys_call_table(, %rax, 8)
-.Lentry_SYSCALL_64_after_fastpath_call:
-
- movq %rax, RAX(%rsp)
-1:
-
- /*
- * If we get here, then we know that pt_regs is clean for SYSRET64.
- * If we see that no exit work is required (which we are required
- * to check with IRQs off), then we can go straight to SYSRET64.
- */
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- movq PER_CPU_VAR(current_task), %r11
- testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
- jnz 1f
-
- LOCKDEP_SYS_EXIT
- TRACE_IRQS_ON /* user mode is traced as IRQs on */
- movq RIP(%rsp), %rcx
- movq EFLAGS(%rsp), %r11
- RESTORE_C_REGS_EXCEPT_RCX_R11
- movq RSP(%rsp), %rsp
- USERGS_SYSRET64
-
-1:
- /*
- * The fast path looked good when we started, but something changed
- * along the way and we need to switch to the slow path. Calling
- * raise(3) will trigger this, for example. IRQs are off.
- */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- SAVE_EXTRA_REGS
- movq %rsp, %rdi
- call syscall_return_slowpath /* returns with IRQs disabled */
- jmp return_from_SYSCALL_64
-
-entry_SYSCALL64_slow_path:
/* IRQs are off. */
- SAVE_EXTRA_REGS
movq %rsp, %rdi
call do_syscall_64 /* returns with IRQs disabled */
-return_from_SYSCALL_64:
RESTORE_EXTRA_REGS
TRACE_IRQS_IRETQ /* we're about to change IF */
@@ -327,53 +260,31 @@
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
RESTORE_C_REGS_EXCEPT_RCX_R11
+
+ /*
+ * This opens a window where we have a user CR3, but are
+ * running in the kernel. This makes using the CS
+ * register useless for telling whether or not we need to
+ * switch CR3 in NMIs. Normal interrupts are OK because
+ * they are off here.
+ */
+ SWITCH_USER_CR3
movq RSP(%rsp), %rsp
USERGS_SYSRET64
opportunistic_sysret_failed:
+ /*
+ * This opens a window where we have a user CR3, but are
+ * running in the kernel. This makes using the CS
+ * register useless for telling whether or not we need to
+ * switch CR3 in NMIs. Normal interrupts are OK because
+ * they are off here.
+ */
+ SWITCH_USER_CR3
SWAPGS
jmp restore_c_regs_and_iret
END(entry_SYSCALL_64)
-ENTRY(stub_ptregs_64)
- /*
- * Syscalls marked as needing ptregs land here.
- * If we are on the fast path, we need to save the extra regs,
- * which we achieve by trying again on the slow path. If we are on
- * the slow path, the extra regs are already saved.
- *
- * RAX stores a pointer to the C function implementing the syscall.
- * IRQs are on.
- */
- cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
- jne 1f
-
- /*
- * Called from fast path -- disable IRQs again, pop return address
- * and jump to slow path
- */
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- popq %rax
- jmp entry_SYSCALL64_slow_path
-
-1:
- jmp *%rax /* Called from C */
-END(stub_ptregs_64)
-
-.macro ptregs_stub func
-ENTRY(ptregs_\func)
- leaq \func(%rip), %rax
- jmp stub_ptregs_64
-END(ptregs_\func)
-.endm
-
-/* Instantiate ptregs_stub for each ptregs-using syscall */
-#define __SYSCALL_64_QUAL_(sym)
-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
-#include <asm/syscalls_64.h>
-
/*
* %rdi: prev task
* %rsi: next task
@@ -399,6 +310,18 @@
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
#endif
+#ifdef CONFIG_RETPOLINE
+ /*
+ * When switching from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+ * with userspace addresses. On CPUs where those concerns
+ * exist, overwrite the RSB with entries which capture
+ * speculative execution to prevent attack.
+ */
+ /* Clobbers %rbx */
+ FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq %r15
popq %r14
@@ -428,13 +351,14 @@
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
TRACE_IRQS_ON /* user mode is traced as IRQS on */
+ SWITCH_USER_CR3
SWAPGS
jmp restore_regs_and_iret
1:
/* kernel thread */
movq %r12, %rdi
- call *%rbx
+ CALL_NOSPEC %rbx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
@@ -482,6 +406,7 @@
* tracking that we're in kernel mode.
*/
SWAPGS
+ SWITCH_KERNEL_CR3
/*
* We need to tell lockdep that IRQs are off. We can't do this until
@@ -539,6 +464,7 @@
mov %rsp,%rdi
call prepare_exit_to_usermode
TRACE_IRQS_IRETQ
+ SWITCH_USER_CR3
SWAPGS
jmp restore_regs_and_iret
@@ -616,6 +542,7 @@
pushq %rdi /* Stash user RDI */
SWAPGS
+ SWITCH_KERNEL_CR3
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* user RAX */
movq (1*8)(%rsp), %rax /* user RIP */
@@ -642,6 +569,7 @@
* still points to an RO alias of the ESPFIX stack.
*/
orq PER_CPU_VAR(espfix_stack), %rax
+ SWITCH_USER_CR3
SWAPGS
movq %rax, %rsp
@@ -686,13 +614,8 @@
#endif
/* Make sure APIC interrupt handlers end up in the irqentry section: */
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
-# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
-# define POP_SECTION_IRQENTRY .popsection
-#else
-# define PUSH_SECTION_IRQENTRY
-# define POP_SECTION_IRQENTRY
-#endif
+#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
+#define POP_SECTION_IRQENTRY .popsection
.macro apicinterrupt num sym do_sym
PUSH_SECTION_IRQENTRY
@@ -872,13 +795,11 @@
ENTRY(native_load_gs_index)
pushfq
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
- TRACE_IRQS_OFF
SWAPGS
.Lgs_change:
movl %edi, %gs
2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
SWAPGS
- TRACE_IRQS_FLAGS (%rsp)
popfq
ret
END(native_load_gs_index)
@@ -1022,13 +943,17 @@
#endif
#ifdef CONFIG_X86_MCE
-idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
+idtentry machine_check do_mce has_error_code=0 paranoid=1
#endif
/*
* Save all registers in pt_regs, and switch gs if needed.
* Use slow, but surefire "are we in kernel?" check.
- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ *
+ * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit
+ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit
+ * ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit
+ * ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit
*/
ENTRY(paranoid_entry)
cld
@@ -1041,7 +966,26 @@
js 1f /* negative -> in kernel */
SWAPGS
xorl %ebx, %ebx
-1: ret
+1:
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * We might have come in between a swapgs and a SWITCH_KERNEL_CR3
+ * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
+ * Do a conditional SWITCH_KERNEL_CR3: this could safely be done
+ * unconditionally, but we need to find out whether the reverse
+ * should be done on return (conveyed to paranoid_exit in %ebx).
+ */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ testl $KAISER_SHADOW_PGD_OFFSET, %eax
+ jz 2f
+ orl $2, %ebx
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ /* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
+ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ movq %rax, %cr3
+2:
+#endif
+ ret
END(paranoid_entry)
/*
@@ -1054,19 +998,26 @@
* be complicated. Fortunately, we there's no good reason
* to try to handle preemption here.
*
- * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
+ * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3
+ * ebx=1: needs neither swapgs nor SWITCH_USER_CR3
+ * ebx=2: needs both swapgs and SWITCH_USER_CR3
+ * ebx=3: needs SWITCH_USER_CR3 but not swapgs
*/
ENTRY(paranoid_exit)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF_DEBUG
- testl %ebx, %ebx /* swapgs needed? */
- jnz paranoid_exit_no_swapgs
- TRACE_IRQS_IRETQ
- SWAPGS_UNSAFE_STACK
- jmp paranoid_exit_restore
-paranoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
-paranoid_exit_restore:
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
+ testl $2, %ebx /* SWITCH_USER_CR3 needed? */
+ jz paranoid_exit_no_switch
+ SWITCH_USER_CR3
+paranoid_exit_no_switch:
+#endif
+ testl $1, %ebx /* swapgs needed? */
+ jnz paranoid_exit_no_swapgs
+ SWAPGS_UNSAFE_STACK
+paranoid_exit_no_swapgs:
RESTORE_EXTRA_REGS
RESTORE_C_REGS
REMOVE_PT_GPREGS_FROM_STACK 8
@@ -1081,6 +1032,13 @@
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
+ /*
+ * error_entry() always returns with a kernel gsbase and
+ * CR3. We must also have a kernel CR3/gsbase before
+ * calling TRACE_IRQS_*. Just unconditionally switch to
+ * the kernel CR3 here.
+ */
+ SWITCH_KERNEL_CR3
xorl %ebx, %ebx
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@@ -1241,6 +1199,10 @@
*/
SWAPGS_UNSAFE_STACK
+ /*
+ * percpu variables are mapped with user CR3, so no need
+ * to switch CR3 here.
+ */
cld
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -1274,12 +1236,34 @@
movq %rsp, %rdi
movq $-1, %rsi
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ movq %rax, %cr3
+2:
+#endif
call do_nmi
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * Unconditionally restore CR3. I know we return to
+ * kernel code that needs user CR3, but do we ever return
+ * to "user mode" where we need the kernel CR3?
+ */
+ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
+#endif
+
/*
* Return back to user mode. We must *not* do the normal exit
- * work, because we don't want to enable interrupts. Fortunately,
- * do_nmi doesn't modify pt_regs.
+ * work, because we don't want to enable interrupts. Do not
+ * switch to user CR3: we might be going back to kernel code
+ * that had a user CR3 set.
*/
SWAPGS
jmp restore_c_regs_and_iret
@@ -1476,22 +1460,55 @@
ALLOC_PT_GPREGS_ON_STACK
/*
- * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
- * as we should not be calling schedule in NMI context.
- * Even with normal interrupts enabled. An NMI should not be
- * setting NEED_RESCHED or anything that normal interrupts and
- * exceptions might do.
+ * Use the same approach as paranoid_entry to handle SWAPGS, but
+ * without CR3 handling since we do that differently in NMIs. No
+ * need to use paranoid_exit as we should not be calling schedule
+ * in NMI context. Even with normal interrupts enabled. An NMI
+ * should not be setting NEED_RESCHED or anything that normal
+ * interrupts and exceptions might do.
*/
- call paranoid_entry
-
- /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+ cld
+ SAVE_C_REGS
+ SAVE_EXTRA_REGS
+ movl $1, %ebx
+ movl $MSR_GS_BASE, %ecx
+ rdmsr
+ testl %edx, %edx
+ js 1f /* negative -> in kernel */
+ SWAPGS
+ xorl %ebx, %ebx
+1:
movq %rsp, %rdi
movq $-1, %rsi
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /* Unconditionally use kernel CR3 for do_nmi() */
+ /* %rax is saved above, so OK to clobber here */
+ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ pushq %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ movq %rax, %cr3
+2:
+#endif
+
+ /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
call do_nmi
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * Unconditionally restore CR3. We might be returning to
+ * kernel code that needs user CR3, like just just before
+ * a sysret.
+ */
+ ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
+#endif
+
testl %ebx, %ebx /* swapgs needed? */
jnz nmi_restore
nmi_swapgs:
+ /* We fixed up CR3 above, so no need to switch it here */
SWAPGS_UNSAFE_STACK
nmi_restore:
RESTORE_EXTRA_REGS
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e1721da..d76a976 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -13,6 +13,8 @@
#include <asm/irqflags.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/pgtable_types.h>
+#include <asm/kaiser.h>
#include <linux/linkage.h>
#include <linux/err.h>
@@ -48,6 +50,7 @@
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
SWAPGS_UNSAFE_STACK
+ SWITCH_KERNEL_CR3_NO_STACK
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/*
@@ -184,6 +187,7 @@
ENTRY(entry_SYSCALL_compat)
/* Interrupts are off on entry. */
SWAPGS_UNSAFE_STACK
+ SWITCH_KERNEL_CR3_NO_STACK
/* Stash user ESP and switch to the kernel stack. */
movl %esp, %r8d
@@ -259,6 +263,7 @@
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r10
+ SWITCH_USER_CR3
movq RSP-ORIG_RAX(%rsp), %rsp
swapgs
sysretl
@@ -297,7 +302,7 @@
PARAVIRT_ADJUST_EXCEPTION_FRAME
ASM_CLAC /* Do this early to minimize exposure */
SWAPGS
-
+ SWITCH_KERNEL_CR3_NO_STACK
/*
* User tracing code (ptrace or signal handlers) might assume that
* the saved RAX contains a 32-bit number when we're invoking a 32-bit
@@ -338,6 +343,7 @@
/* Go back to user mode. */
TRACE_IRQS_ON
+ SWITCH_USER_CR3
SWAPGS
jmp restore_regs_and_iret
END(entry_INT80_compat)
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 9dbc5ab..6705edd 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -6,14 +6,11 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
-#define __SYSCALL_64_QUAL_(sym) sym
-#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
-
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
+#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 636c4b3..0174290 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -46,6 +46,7 @@
#else
EMULATE;
#endif
+unsigned long vsyscall_pgprot = __PAGE_KERNEL_VSYSCALL;
static int __init vsyscall_setup(char *str)
{
@@ -66,6 +67,11 @@
}
early_param("vsyscall", vsyscall_setup);
+bool vsyscall_enabled(void)
+{
+ return vsyscall_mode != NONE;
+}
+
static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
const char *message)
{
@@ -331,11 +337,11 @@
extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
+ if (vsyscall_mode != NATIVE)
+ vsyscall_pgprot = __PAGE_KERNEL_VVAR;
if (vsyscall_mode != NONE)
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
- vsyscall_mode == NATIVE
- ? PAGE_KERNEL_VSYSCALL
- : PAGE_KERNEL_VVAR);
+ __pgprot(vsyscall_pgprot));
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
index 9842270..21a4e41 100644
--- a/arch/x86/events/amd/power.c
+++ b/arch/x86/events/amd/power.c
@@ -277,7 +277,7 @@
int ret;
if (!x86_match_cpu(cpu_match))
- return 0;
+ return -ENODEV;
if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
return -ENODEV;
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 982c9e3..21298c1 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -22,6 +22,7 @@
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/coredump.h>
+#include <linux/kaiser.h>
#include <asm-generic/sizes.h>
#include <asm/perf_event.h>
@@ -77,6 +78,23 @@
return 1 << (PAGE_SHIFT + page_private(page));
}
+static void bts_buffer_free_aux(void *data)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ struct bts_buffer *buf = data;
+ int nbuf;
+
+ for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) {
+ struct page *page = buf->buf[nbuf].page;
+ void *kaddr = page_address(page);
+ size_t page_size = buf_size(page);
+
+ kaiser_remove_mapping((unsigned long)kaddr, page_size);
+ }
+#endif
+ kfree(data);
+}
+
static void *
bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
{
@@ -113,29 +131,33 @@
buf->real_size = size - size % BTS_RECORD_SIZE;
for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
- unsigned int __nr_pages;
+ void *kaddr = pages[pg];
+ size_t page_size;
- page = virt_to_page(pages[pg]);
- __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
+ page = virt_to_page(kaddr);
+ page_size = buf_size(page);
+
+ if (kaiser_add_mapping((unsigned long)kaddr,
+ page_size, __PAGE_KERNEL) < 0) {
+ buf->nr_bufs = nbuf;
+ bts_buffer_free_aux(buf);
+ return NULL;
+ }
+
buf->buf[nbuf].page = page;
buf->buf[nbuf].offset = offset;
buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
- buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
+ buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement;
pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
buf->buf[nbuf].size -= pad;
- pg += __nr_pages;
- offset += __nr_pages << PAGE_SHIFT;
+ pg += page_size >> PAGE_SHIFT;
+ offset += page_size;
}
return buf;
}
-static void bts_buffer_free_aux(void *data)
-{
- kfree(data);
-}
-
static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
{
return buf->buf[idx].offset + buf->buf[idx].displacement;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index be20239..8e7a3f1 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2,11 +2,15 @@
#include <linux/types.h>
#include <linux/slab.h>
+#include <asm/kaiser.h>
#include <asm/perf_event.h>
#include <asm/insn.h>
#include "../perf_event.h"
+static
+DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store);
+
/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE 24
@@ -268,6 +272,39 @@
static DEFINE_PER_CPU(void *, insn_buffer);
+static void *dsalloc(size_t size, gfp_t flags, int node)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ unsigned int order = get_order(size);
+ struct page *page;
+ unsigned long addr;
+
+ page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
+ if (!page)
+ return NULL;
+ addr = (unsigned long)page_address(page);
+ if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) {
+ __free_pages(page, order);
+ addr = 0;
+ }
+ return (void *)addr;
+#else
+ return kmalloc_node(size, flags | __GFP_ZERO, node);
+#endif
+}
+
+static void dsfree(const void *buffer, size_t size)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ if (!buffer)
+ return;
+ kaiser_remove_mapping((unsigned long)buffer, size);
+ free_pages((unsigned long)buffer, get_order(size));
+#else
+ kfree(buffer);
+#endif
+}
+
static int alloc_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -278,7 +315,7 @@
if (!x86_pmu.pebs)
return 0;
- buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+ buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
if (unlikely(!buffer))
return -ENOMEM;
@@ -289,7 +326,7 @@
if (x86_pmu.intel_cap.pebs_format < 2) {
ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
if (!ibuffer) {
- kfree(buffer);
+ dsfree(buffer, x86_pmu.pebs_buffer_size);
return -ENOMEM;
}
per_cpu(insn_buffer, cpu) = ibuffer;
@@ -315,7 +352,8 @@
kfree(per_cpu(insn_buffer, cpu));
per_cpu(insn_buffer, cpu) = NULL;
- kfree((void *)(unsigned long)ds->pebs_buffer_base);
+ dsfree((void *)(unsigned long)ds->pebs_buffer_base,
+ x86_pmu.pebs_buffer_size);
ds->pebs_buffer_base = 0;
}
@@ -329,7 +367,7 @@
if (!x86_pmu.bts)
return 0;
- buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+ buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
if (unlikely(!buffer)) {
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
return -ENOMEM;
@@ -355,19 +393,15 @@
if (!ds || !x86_pmu.bts)
return;
- kfree((void *)(unsigned long)ds->bts_buffer_base);
+ dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE);
ds->bts_buffer_base = 0;
}
static int alloc_ds_buffer(int cpu)
{
- int node = cpu_to_node(cpu);
- struct debug_store *ds;
+ struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu);
- ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
- if (unlikely(!ds))
- return -ENOMEM;
-
+ memset(ds, 0, sizeof(*ds));
per_cpu(cpu_hw_events, cpu).ds = ds;
return 0;
@@ -381,7 +415,6 @@
return;
per_cpu(cpu_hw_events, cpu).ds = NULL;
- kfree(ds);
}
void release_ds_buffers(void)
@@ -1389,9 +1422,13 @@
continue;
/* log dropped samples number */
- if (error[bit])
+ if (error[bit]) {
perf_log_lost_samples(event, error[bit]);
+ if (perf_event_account_interrupt(event))
+ x86_pmu_stop(event, 0);
+ }
+
if (counts[bit]) {
__intel_pmu_pebs_event(event, iregs, base,
top, bit, counts[bit]);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index ccbe24e..6665000 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -139,7 +139,7 @@
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
- ".popsection"
+ ".popsection\n"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
OLDINSTR_2(oldinstr, 1, 2) \
@@ -150,7 +150,7 @@
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
- ".popsection"
+ ".popsection\n"
/*
* Alternative instructions for different CPU types or capabilities.
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 44b8762..1666542 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -10,7 +10,34 @@
#include <asm/pgtable.h>
#include <asm/special_insns.h>
#include <asm/preempt.h>
+#include <asm/asm.h>
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
#endif
+
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_32
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
+#else
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
+INDIRECT_THUNK(8)
+INDIRECT_THUNK(9)
+INDIRECT_THUNK(10)
+INDIRECT_THUNK(11)
+INDIRECT_THUNK(12)
+INDIRECT_THUNK(13)
+INDIRECT_THUNK(14)
+INDIRECT_THUNK(15)
+#endif
+INDIRECT_THUNK(ax)
+INDIRECT_THUNK(bx)
+INDIRECT_THUNK(cx)
+INDIRECT_THUNK(dx)
+INDIRECT_THUNK(si)
+INDIRECT_THUNK(di)
+INDIRECT_THUNK(bp)
+asmlinkage void __fill_rsb(void);
+asmlinkage void __clear_rsb(void);
+
+#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index be70795a..08684b3 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -11,10 +11,12 @@
# define __ASM_FORM_COMMA(x) " " #x ","
#endif
-#ifdef CONFIG_X86_32
+#ifndef __x86_64__
+/* 32 bit */
# define __ASM_SEL(a,b) __ASM_FORM(a)
# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
#else
+/* 64 bit */
# define __ASM_SEL(a,b) __ASM_FORM(b)
# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
#endif
@@ -133,8 +135,8 @@
* gets set up by the containing function. If you forget to do this, objtool
* may print a "call without frame pointer save/setup" warning.
*/
-register unsigned int __asm_call_sp asm("esp");
-#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
+register unsigned long current_stack_pointer asm(_ASM_SP);
+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
#endif
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index bfb28ca..8575903 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -23,6 +23,34 @@
#define wmb() asm volatile("sfence" ::: "memory")
#endif
+/**
+ * array_index_mask_nospec() - generate a mask that is ~0UL when the
+ * bounds check succeeds and 0 otherwise
+ * @index: array element index
+ * @size: number of elements in array
+ *
+ * Returns:
+ * 0 - (index < size)
+ */
+static inline unsigned long array_index_mask_nospec(unsigned long index,
+ unsigned long size)
+{
+ unsigned long mask;
+
+ asm ("cmp %1,%2; sbb %0,%0;"
+ :"=r" (mask)
+ :"r"(size),"r" (index)
+ :"cc");
+ return mask;
+}
+
+/* Override the default implementation from linux/nospec.h. */
+#define array_index_mask_nospec array_index_mask_nospec
+
+/* Prevent speculative execution past this barrier. */
+#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
+ "lfence", X86_FEATURE_LFENCE_RDTSC)
+
#ifdef CONFIG_X86_PPRO_FENCE
#define dma_rmb() rmb()
#else
diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
index e01f7f7..84ae170 100644
--- a/arch/x86/include/asm/cmdline.h
+++ b/arch/x86/include/asm/cmdline.h
@@ -2,5 +2,7 @@
#define _ASM_X86_CMDLINE_H
int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
+int cmdline_find_option(const char *cmdline_ptr, const char *option,
+ char *buffer, int bufsize);
#endif /* _ASM_X86_CMDLINE_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1d2b69f..8c10157 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -28,6 +28,7 @@
CPUID_8000_000A_EDX,
CPUID_7_ECX,
CPUID_8000_0007_EBX,
+ CPUID_7_EDX,
};
#ifdef CONFIG_X86_FEATURE_NAMES
@@ -78,8 +79,9 @@
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -100,8 +102,9 @@
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
@@ -135,6 +138,8 @@
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index ed10b5b..8eb23f5 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 18 /* N 32-bit words worth of info */
+#define NCAPINTS 19 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -189,13 +189,20 @@
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+
+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
+
+/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+
+#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -228,6 +235,7 @@
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
@@ -252,6 +260,9 @@
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
+#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -287,6 +298,13 @@
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+
/*
* BUG word(s)
*/
@@ -312,5 +330,8 @@
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 12080d8..2ed5a2b 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -43,7 +43,7 @@
struct desc_struct gdt[GDT_ENTRIES];
} __attribute__((aligned(PAGE_SIZE)));
-DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
+DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page);
static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
{
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 85599ad..1f8cca4 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -21,11 +21,13 @@
# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
+# define DISABLE_PCID 0
#else
# define DISABLE_VME 0
# define DISABLE_K6_MTRR 0
# define DISABLE_CYRIX_ARR 0
# define DISABLE_CENTAUR_MCR 0
+# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -43,7 +45,7 @@
#define DISABLED_MASK1 0
#define DISABLED_MASK2 0
#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4 0
+#define DISABLED_MASK4 (DISABLE_PCID)
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
#define DISABLED_MASK7 0
@@ -57,6 +59,7 @@
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
#define DISABLED_MASK17 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 59405a2..9b76cd3 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -22,8 +22,8 @@
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
- unsigned int irq_tlb_count;
#endif
+ unsigned int irq_tlb_count;
#ifdef CONFIG_X86_THERMAL_VECTOR
unsigned int irq_thermal_count;
#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b90e105..0817d63 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -178,7 +178,7 @@
#define VECTOR_RETRIGGERED ((void *)~0UL)
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
-DECLARE_PER_CPU(vector_irq_t, vector_irq);
+DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq);
#endif /* !ASSEMBLY_ */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 34a46dc..75b748a 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -12,6 +12,7 @@
*/
#define INTEL_FAM6_CORE_YONAH 0x0E
+
#define INTEL_FAM6_CORE2_MEROM 0x0F
#define INTEL_FAM6_CORE2_MEROM_L 0x16
#define INTEL_FAM6_CORE2_PENRYN 0x17
@@ -21,6 +22,7 @@
#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
#define INTEL_FAM6_NEHALEM_EP 0x1A
#define INTEL_FAM6_NEHALEM_EX 0x2E
+
#define INTEL_FAM6_WESTMERE 0x25
#define INTEL_FAM6_WESTMERE_EP 0x2C
#define INTEL_FAM6_WESTMERE_EX 0x2F
@@ -36,9 +38,9 @@
#define INTEL_FAM6_HASWELL_GT3E 0x46
#define INTEL_FAM6_BROADWELL_CORE 0x3D
-#define INTEL_FAM6_BROADWELL_XEON_D 0x56
#define INTEL_FAM6_BROADWELL_GT3E 0x47
#define INTEL_FAM6_BROADWELL_X 0x4F
+#define INTEL_FAM6_BROADWELL_XEON_D 0x56
#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
@@ -57,9 +59,10 @@
#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
-#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */
+#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
+#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
/* Xeon Phi */
diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
new file mode 100644
index 0000000..802bbbd
--- /dev/null
+++ b/arch/x86/include/asm/kaiser.h
@@ -0,0 +1,141 @@
+#ifndef _ASM_X86_KAISER_H
+#define _ASM_X86_KAISER_H
+
+#include <uapi/asm/processor-flags.h> /* For PCID constants */
+
+/*
+ * This file includes the definitions for the KAISER feature.
+ * KAISER is a counter measure against x86_64 side channel attacks on
+ * the kernel virtual memory. It has a shadow pgd for every process: the
+ * shadow pgd has a minimalistic kernel-set mapped, but includes the whole
+ * user memory. Within a kernel context switch, or when an interrupt is handled,
+ * the pgd is switched to the normal one. When the system switches to user mode,
+ * the shadow pgd is enabled. By this, the virtual memory caches are freed,
+ * and the user may not attack the whole kernel memory.
+ *
+ * A minimalistic kernel mapping holds the parts needed to be mapped in user
+ * mode, such as the entry/exit functions of the user space, or the stacks.
+ */
+
+#define KAISER_SHADOW_PGD_OFFSET 0x1000
+
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+
+.macro _SWITCH_TO_KERNEL_CR3 reg
+movq %cr3, \reg
+andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
+ALTERNATIVE "", "bts $63, \reg", X86_FEATURE_PCID
+movq \reg, %cr3
+.endm
+
+.macro _SWITCH_TO_USER_CR3 reg regb
+/*
+ * regb must be the low byte portion of reg: because we have arranged
+ * for the low byte of the user PCID to serve as the high byte of NOFLUSH
+ * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are
+ * not enabled): so that the one register can update both memory and cr3.
+ */
+movq %cr3, \reg
+orq PER_CPU_VAR(x86_cr3_pcid_user), \reg
+js 9f
+/* If PCID enabled, FLUSH this time, reset to NOFLUSH for next time */
+movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7)
+9:
+movq \reg, %cr3
+.endm
+
+.macro SWITCH_KERNEL_CR3
+ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
+_SWITCH_TO_KERNEL_CR3 %rax
+popq %rax
+8:
+.endm
+
+.macro SWITCH_USER_CR3
+ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
+_SWITCH_TO_USER_CR3 %rax %al
+popq %rax
+8:
+.endm
+
+.macro SWITCH_KERNEL_CR3_NO_STACK
+ALTERNATIVE "jmp 8f", \
+ __stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \
+ X86_FEATURE_KAISER
+_SWITCH_TO_KERNEL_CR3 %rax
+movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+8:
+.endm
+
+#else /* CONFIG_PAGE_TABLE_ISOLATION */
+
+.macro SWITCH_KERNEL_CR3
+.endm
+.macro SWITCH_USER_CR3
+.endm
+.macro SWITCH_KERNEL_CR3_NO_STACK
+.endm
+
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Upon kernel/user mode switch, it may happen that the address
+ * space has to be switched before the registers have been
+ * stored. To change the address space, another register is
+ * needed. A register therefore has to be stored/restored.
+*/
+DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+
+extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+
+extern int kaiser_enabled;
+extern void __init kaiser_check_boottime_disable(void);
+#else
+#define kaiser_enabled 0
+static inline void __init kaiser_check_boottime_disable(void) {}
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+/*
+ * Kaiser function prototypes are needed even when CONFIG_PAGE_TABLE_ISOLATION is not set,
+ * so as to build with tests on kaiser_enabled instead of #ifdefs.
+ */
+
+/**
+ * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+ * @addr: the start address of the range
+ * @size: the size of the range
+ * @flags: The mapping flags of the pages
+ *
+ * The mapping is done on a global scope, so no bigger
+ * synchronization has to be done. the pages have to be
+ * manually unmapped again when they are not needed any longer.
+ */
+extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+
+/**
+ * kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping
+ * @addr: the start address of the range
+ * @size: the size of the range
+ */
+extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
+
+/**
+ * kaiser_init - Initialize the shadow mapping
+ *
+ * Most parts of the shadow mapping can be mapped upon boot
+ * time. Only per-process things like the thread stacks
+ * or a new LDT have to be mapped at runtime. These boot-
+ * time mappings are permanent and never unmapped.
+ */
+extern void kaiser_init(void);
+
+#endif /* __ASSEMBLY */
+
+#endif /* _ASM_X86_KAISER_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bdde807..20cfeeb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1113,7 +1113,8 @@
static inline int emulate_instruction(struct kvm_vcpu *vcpu,
int emulation_type)
{
- return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
+ return x86_emulate_instruction(vcpu, 0,
+ emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
}
void kvm_enable_efer_bits(u64);
@@ -1397,4 +1398,7 @@
#endif
}
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 72198c6..8b272a0 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -33,12 +33,6 @@
#endif
} mm_context_t;
-#ifdef CONFIG_SMP
void leave_mm(int cpu);
-#else
-static inline void leave_mm(int cpu)
-{
-}
-#endif
#endif /* _ASM_X86_MMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index f9dd224..d23e355 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -99,10 +99,8 @@
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
-#ifdef CONFIG_SMP
if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
-#endif
}
static inline int init_new_context(struct task_struct *tsk,
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b601dda..c768bc1 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -37,6 +37,13 @@
#define EFER_FFXSR (1<<_EFER_FFXSR)
/* Intel MSRs. Some also available on other CPUs */
+#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
+#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
+#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
+
+#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
+#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
+
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
@@ -50,6 +57,11 @@
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
#define MSR_MTRRcap 0x000000fe
+
+#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
+#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
+#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
+
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
@@ -330,6 +342,9 @@
#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
#define MSR_FAM10H_NODE_ID 0xc001100c
+#define MSR_F10H_DECFG 0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xc001001a
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index b5fee97..ed35b91 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -188,8 +188,7 @@
* that some other imaginary CPU is updating continuously with a
* time stamp.
*/
- alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
- "lfence", X86_FEATURE_LFENCE_RDTSC);
+ barrier_nospec();
return rdtsc();
}
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644
index 0000000..300cc15
--- /dev/null
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_X86_NOSPEC_BRANCH_H_
+#define _ASM_X86_NOSPEC_BRANCH_H_
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeatures.h>
+
+#ifdef __ASSEMBLY__
+
+/*
+ * This should be used immediately before a retpoline alternative. It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+.macro ANNOTATE_NOSPEC_ALTERNATIVE
+ .Lannotate_\@:
+ .pushsection .discard.nospec
+ .long .Lannotate_\@ - .
+ .popsection
+.endm
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+ call .Ldo_rop_\@
+.Lspec_trap_\@:
+ pause
+ lfence
+ jmp .Lspec_trap_\@
+.Ldo_rop_\@:
+ mov \reg, (%_ASM_SP)
+ ret
+.endm
+
+/*
+ * This is a wrapper around RETPOLINE_JMP so the called function in reg
+ * returns to the instruction after the macro.
+ */
+.macro RETPOLINE_CALL reg:req
+ jmp .Ldo_call_\@
+.Ldo_retpoline_jmp_\@:
+ RETPOLINE_JMP \reg
+.Ldo_call_\@:
+ call .Ldo_retpoline_jmp_\@
+.endm
+
+/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro JMP_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE_2 __stringify(jmp *\reg), \
+ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ jmp *\reg
+#endif
+.endm
+
+.macro CALL_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE_2 __stringify(call *\reg), \
+ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+ __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ call *\reg
+#endif
+.endm
+
+/* This clobbers the BX register */
+.macro FILL_RETURN_BUFFER nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE "", "call __clear_rsb", \ftr
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#define ANNOTATE_NOSPEC_ALTERNATIVE \
+ "999:\n\t" \
+ ".pushsection .discard.nospec\n\t" \
+ ".long 999b - .\n\t" \
+ ".popsection\n\t"
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define CALL_NOSPEC \
+ ANNOTATE_NOSPEC_ALTERNATIVE \
+ ALTERNATIVE( \
+ "call *%[thunk_target]\n", \
+ "call __x86_indirect_thunk_%V[thunk_target]\n", \
+ X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
+ " jmp 904f;\n" \
+ " .align 16\n" \
+ "901: call 903f;\n" \
+ "902: pause;\n" \
+ " lfence;\n" \
+ " jmp 902b;\n" \
+ " .align 16\n" \
+ "903: addl $4, %%esp;\n" \
+ " pushl %[thunk_target];\n" \
+ " ret;\n" \
+ " .align 16\n" \
+ "904: call 901b;\n", \
+ X86_FEATURE_RETPOLINE)
+
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline for C / inline asm */
+# define CALL_NOSPEC "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+/* The Spectre V2 mitigation variants */
+enum spectre_v2_mitigation {
+ SPECTRE_V2_NONE,
+ SPECTRE_V2_RETPOLINE_MINIMAL,
+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
+ SPECTRE_V2_RETPOLINE_GENERIC,
+ SPECTRE_V2_RETPOLINE_AMD,
+ SPECTRE_V2_IBRS,
+};
+
+extern char __indirect_thunk_start[];
+extern char __indirect_thunk_end[];
+
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ALL *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+ alternative_input("",
+ "call __fill_rsb",
+ X86_FEATURE_RETPOLINE,
+ ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+#endif
+}
+
+static inline void indirect_branch_prediction_barrier(void)
+{
+ asm volatile(ALTERNATIVE("",
+ "movl %[msr], %%ecx\n\t"
+ "movl %[val], %%eax\n\t"
+ "movl $0, %%edx\n\t"
+ "wrmsr",
+ X86_FEATURE_USE_IBPB)
+ : : [msr] "i" (MSR_IA32_PRED_CMD),
+ [val] "i" (PRED_CMD_IBPB)
+ : "eax", "ecx", "edx", "memory");
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 437feb4..5af0401 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -18,6 +18,18 @@
#ifndef __ASSEMBLY__
#include <asm/x86_init.h>
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+extern int kaiser_enabled;
+/*
+ * Instead of one PGD, we acquire two PGDs. Being order-1, it is
+ * both 8k in size and 8k-aligned. That lets us just flip bit 12
+ * in a pointer to swap between the two 4k halves.
+ */
+#else
+#define kaiser_enabled 0
+#endif
+#define PGD_ALLOCATION_ORDER kaiser_enabled
+
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
void ptdump_walk_pgd_level_checkwx(void);
@@ -690,7 +702,17 @@
static inline int pgd_bad(pgd_t pgd)
{
- return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
+ pgdval_t ignore_flags = _PAGE_USER;
+ /*
+ * We set NX on KAISER pgds that map userspace memory so
+ * that userspace can not meaningfully use the kernel
+ * page table by accident; it will fault on the first
+ * instruction it tries to run. See native_set_pgd().
+ */
+ if (kaiser_enabled)
+ ignore_flags |= _PAGE_NX;
+
+ return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
}
static inline int pgd_none(pgd_t pgd)
@@ -903,7 +925,15 @@
*/
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
- memcpy(dst, src, count * sizeof(pgd_t));
+ memcpy(dst, src, count * sizeof(pgd_t));
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ if (kaiser_enabled) {
+ /* Clone the shadow pgd part as well */
+ memcpy(native_get_shadow_pgd(dst),
+ native_get_shadow_pgd(src),
+ count * sizeof(pgd_t));
+ }
+#endif
}
#define PTE_SHIFT ilog2(PTRS_PER_PTE)
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 1cc82ec..ce97c8c6 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -106,9 +106,32 @@
native_set_pud(pud, native_make_pud(0));
}
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
+
+static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+{
+#ifdef CONFIG_DEBUG_VM
+ /* linux/mmdebug.h may not have been included at this point */
+ BUG_ON(!kaiser_enabled);
+#endif
+ return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
+}
+#else
+static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ return pgd;
+}
+static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+{
+ BUILD_BUG_ON(1);
+ return NULL;
+}
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
- *pgdp = pgd;
+ *pgdp = kaiser_set_shadow_pgd(pgdp, pgd);
}
static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 8b4de22..f1c8ac4 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -119,7 +119,7 @@
#define _PAGE_DEVMAP (_AT(pteval_t, 0))
#endif
-#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_DIRTY)
@@ -137,6 +137,33 @@
_PAGE_SOFT_DIRTY)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+/* The ASID is the lower 12 bits of CR3 */
+#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL))
+
+/* Mask for all the PCID-related bits in CR3: */
+#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
+#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL))
+
+#if defined(CONFIG_PAGE_TABLE_ISOLATION) && defined(CONFIG_X86_64)
+/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */
+#define X86_CR3_PCID_ASID_USER (_AC(0x80,UL))
+
+#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN)
+#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER)
+#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
+#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
+#else
+#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL))
+/*
+ * PCIDs are unsupported on 32-bit and none of these bits can be
+ * set in CR3:
+ */
+#define X86_CR3_PCID_KERN_FLUSH (0)
+#define X86_CR3_PCID_USER_FLUSH (0)
+#define X86_CR3_PCID_KERN_NOFLUSH (0)
+#define X86_CR3_PCID_USER_NOFLUSH (0)
+#endif
+
/*
* The cache modes defined here are used to translate between pure SW usage
* and the HW defined cache mode bits and/or PAT entries.
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 83db0ea..cb866ae 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -156,8 +156,8 @@
extern struct cpuinfo_x86 new_cpu_data;
extern struct tss_struct doublefault_tss;
-extern __u32 cpu_caps_cleared[NCAPINTS];
-extern __u32 cpu_caps_set[NCAPINTS];
+extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
#ifdef CONFIG_SMP
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -308,7 +308,7 @@
} ____cacheline_aligned;
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
@@ -391,8 +391,6 @@
unsigned short gsindex;
#endif
- u32 status; /* thread synchronous flags */
-
#ifdef CONFIG_X86_64
unsigned long fsbase;
unsigned long gsbase;
@@ -596,7 +594,7 @@
{
int tmp;
-#ifdef CONFIG_M486
+#ifdef CONFIG_X86_32
/*
* Do a CPUID if available, otherwise do a jump. The jump
* can conveniently enough be the jump around CPUID.
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index fac9a5c..6847d85 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -100,6 +100,7 @@
#define REQUIRED_MASK15 0
#define REQUIRED_MASK16 0
#define REQUIRED_MASK17 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index e3c95e8..03eedc2 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@
* TS_COMPAT is set for 32-bit syscall entries and then
* remains set until we return to user mode.
*/
- if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
+ if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
/*
* Sign-extend the value so (int)-EFOO becomes (long)-EFOO
* and will match correctly in comparisons.
@@ -116,7 +116,7 @@
unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread.status & TS_COMPAT)
+ if (task->thread_info.status & TS_COMPAT)
switch (i) {
case 0:
if (!n--) break;
@@ -177,7 +177,7 @@
const unsigned long *args)
{
# ifdef CONFIG_IA32_EMULATION
- if (task->thread.status & TS_COMPAT)
+ if (task->thread_info.status & TS_COMPAT)
switch (i) {
case 0:
if (!n--) break;
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 91dfcaf..bad25bb 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -21,7 +21,7 @@
asmlinkage long sys_iopl(unsigned int);
/* kernel/ldt.c */
-asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
+asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
/* kernel/signal.c */
asmlinkage long sys_rt_sigreturn(void);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ad6f5eb0..89978b9 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -54,6 +54,7 @@
struct thread_info {
unsigned long flags; /* low level flags */
+ u32 status; /* thread synchronous flags */
};
#define INIT_THREAD_INFO(tsk) \
@@ -152,17 +153,6 @@
*/
#ifndef __ASSEMBLY__
-static inline unsigned long current_stack_pointer(void)
-{
- unsigned long sp;
-#ifdef CONFIG_X86_64
- asm("mov %%rsp,%0" : "=g" (sp));
-#else
- asm("mov %%esp,%0" : "=g" (sp));
-#endif
- return sp;
-}
-
/*
* Walks up the stack frames to make sure that the specified object is
* entirely contained by a single stack frame.
@@ -224,7 +214,7 @@
#define in_ia32_syscall() true
#else
#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
- current->thread.status & TS_COMPAT)
+ current_thread_info()->status & TS_COMPAT)
#endif
/*
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index fc5abff..94146f6 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -7,6 +7,7 @@
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
+#include <asm/smp.h>
static inline void __invpcid(unsigned long pcid, unsigned long addr,
unsigned long type)
@@ -65,10 +66,8 @@
#endif
struct tlb_state {
-#ifdef CONFIG_SMP
struct mm_struct *active_mm;
int state;
-#endif
/*
* Access to this CR4 shadow and to H/W CR4 is protected by
@@ -133,6 +132,24 @@
cr4_set_bits(mask);
}
+/*
+ * Declare a couple of kaiser interfaces here for convenience,
+ * to avoid the need for asm/kaiser.h in unexpected places.
+ */
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+extern int kaiser_enabled;
+extern void kaiser_setup_pcid(void);
+extern void kaiser_flush_tlb_on_return_to_user(void);
+#else
+#define kaiser_enabled 0
+static inline void kaiser_setup_pcid(void)
+{
+}
+static inline void kaiser_flush_tlb_on_return_to_user(void)
+{
+}
+#endif
+
static inline void __native_flush_tlb(void)
{
/*
@@ -141,6 +158,8 @@
* back:
*/
preempt_disable();
+ if (kaiser_enabled)
+ kaiser_flush_tlb_on_return_to_user();
native_write_cr3(native_read_cr3());
preempt_enable();
}
@@ -150,20 +169,27 @@
unsigned long cr4;
cr4 = this_cpu_read(cpu_tlbstate.cr4);
- /* clear PGE */
- native_write_cr4(cr4 & ~X86_CR4_PGE);
- /* write old PGE again and flush TLBs */
- native_write_cr4(cr4);
+ if (cr4 & X86_CR4_PGE) {
+ /* clear PGE and flush TLB of all entries */
+ native_write_cr4(cr4 & ~X86_CR4_PGE);
+ /* restore PGE as it was before */
+ native_write_cr4(cr4);
+ } else {
+ /* do it with cr3, letting kaiser flush user PCID */
+ __native_flush_tlb();
+ }
}
static inline void __native_flush_tlb_global(void)
{
unsigned long flags;
- if (static_cpu_has(X86_FEATURE_INVPCID)) {
+ if (this_cpu_has(X86_FEATURE_INVPCID)) {
/*
* Using INVPCID is considerably faster than a pair of writes
* to CR4 sandwiched inside an IRQ flag save/restore.
+ *
+ * Note, this works with CR4.PCIDE=0 or 1.
*/
invpcid_flush_all();
return;
@@ -175,23 +201,52 @@
* be called from deep inside debugging code.)
*/
raw_local_irq_save(flags);
-
__native_flush_tlb_global_irq_disabled();
-
raw_local_irq_restore(flags);
}
static inline void __native_flush_tlb_single(unsigned long addr)
{
- asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ /*
+ * SIMICS #GP's if you run INVPCID with type 2/3
+ * and X86_CR4_PCIDE clear. Shame!
+ *
+ * The ASIDs used below are hard-coded. But, we must not
+ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call
+ * invlpg in the case we are called early.
+ */
+
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
+ if (kaiser_enabled)
+ kaiser_flush_tlb_on_return_to_user();
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ return;
+ }
+ /* Flush the address out of both PCIDs. */
+ /*
+ * An optimization here might be to determine addresses
+ * that are only kernel-mapped and only flush the kernel
+ * ASID. But, userspace flushes are probably much more
+ * important performance-wise.
+ *
+ * Make sure to do only a single invpcid when KAISER is
+ * disabled and we have only a single ASID.
+ */
+ if (kaiser_enabled)
+ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
+ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
}
static inline void __flush_tlb_all(void)
{
- if (boot_cpu_has(X86_FEATURE_PGE))
- __flush_tlb_global();
- else
- __flush_tlb();
+ __flush_tlb_global();
+ /*
+ * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+ * we'd end up flushing kernel translations for the current ASID but
+ * we might fail to flush kernel translations for other cached ASIDs.
+ *
+ * To avoid this issue, we force PCID off if PGE is off.
+ */
}
static inline void __flush_tlb_one(unsigned long addr)
@@ -205,7 +260,6 @@
/*
* TLB flushing:
*
- * - flush_tlb() flushes the current mm struct TLBs
* - flush_tlb_all() flushes all processes TLBs
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
* - flush_tlb_page(vma, vmaddr) flushes one page
@@ -217,84 +271,6 @@
* and page-granular flushes are available only on i486 and up.
*/
-#ifndef CONFIG_SMP
-
-/* "_up" is for UniProcessor.
- *
- * This is a helper for other header functions. *Not* intended to be called
- * directly. All global TLB flushes need to either call this, or to bump the
- * vm statistics themselves.
- */
-static inline void __flush_tlb_up(void)
-{
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
- __flush_tlb();
-}
-
-static inline void flush_tlb_all(void)
-{
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
- __flush_tlb_all();
-}
-
-static inline void flush_tlb(void)
-{
- __flush_tlb_up();
-}
-
-static inline void local_flush_tlb(void)
-{
- __flush_tlb_up();
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
- if (mm == current->active_mm)
- __flush_tlb_up();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
- unsigned long addr)
-{
- if (vma->vm_mm == current->active_mm)
- __flush_tlb_one(addr);
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
- if (vma->vm_mm == current->active_mm)
- __flush_tlb_up();
-}
-
-static inline void flush_tlb_mm_range(struct mm_struct *mm,
- unsigned long start, unsigned long end, unsigned long vmflag)
-{
- if (mm == current->active_mm)
- __flush_tlb_up();
-}
-
-static inline void native_flush_tlb_others(const struct cpumask *cpumask,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
-{
-}
-
-static inline void reset_lazy_tlbstate(void)
-{
-}
-
-static inline void flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
-{
- flush_tlb_all();
-}
-
-#else /* SMP */
-
-#include <asm/smp.h>
-
#define local_flush_tlb() __flush_tlb()
#define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
@@ -303,13 +279,14 @@
flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long vmflag);
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
-#define flush_tlb() flush_tlb_current_task()
+static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
+{
+ flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
+}
void native_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
@@ -324,8 +301,6 @@
this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
}
-#endif /* SMP */
-
#ifndef CONFIG_PARAVIRT
#define flush_tlb_others(mask, mm, start, end) \
native_flush_tlb_others(mask, mm, start, end)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 01fd0a7..688315b 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -92,6 +92,7 @@
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *, long);
#endif
+dotraplinkage void do_mce(struct pt_regs *, long);
static inline int get_si_code(unsigned long condition)
{
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 0bd651e..0c87840 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -123,6 +123,11 @@
#define __uaccess_begin() stac()
#define __uaccess_end() clac()
+#define __uaccess_begin_nospec() \
+({ \
+ stac(); \
+ barrier_nospec(); \
+})
/*
* This is a type: either unsigned long, if the argument fits into
@@ -432,7 +437,7 @@
({ \
int __gu_err; \
__inttype(*(ptr)) __gu_val; \
- __uaccess_begin(); \
+ __uaccess_begin_nospec(); \
__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
__uaccess_end(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
@@ -474,6 +479,10 @@
__uaccess_begin(); \
barrier();
+#define uaccess_try_nospec do { \
+ current->thread.uaccess_err = 0; \
+ __uaccess_begin_nospec(); \
+
#define uaccess_catch(err) \
__uaccess_end(); \
(err) |= (current->thread.uaccess_err ? -EFAULT : 0); \
@@ -538,7 +547,7 @@
* get_user_ex(...);
* } get_user_catch(err)
*/
-#define get_user_try uaccess_try
+#define get_user_try uaccess_try_nospec
#define get_user_catch(err) uaccess_catch(err)
#define get_user_ex(x, ptr) do { \
@@ -573,7 +582,7 @@
__typeof__(ptr) __uval = (uval); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
- __uaccess_begin(); \
+ __uaccess_begin_nospec(); \
switch (size) { \
case 1: \
{ \
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 7d3bdd1..d6d2450 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -102,17 +102,17 @@
switch (n) {
case 1:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u8 *)to, from, 1, ret, 1);
__uaccess_end();
return ret;
case 2:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u16 *)to, from, 2, ret, 2);
__uaccess_end();
return ret;
case 4:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u32 *)to, from, 4, ret, 4);
__uaccess_end();
return ret;
@@ -130,17 +130,17 @@
switch (n) {
case 1:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u8 *)to, from, 1, ret, 1);
__uaccess_end();
return ret;
case 2:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u16 *)to, from, 2, ret, 2);
__uaccess_end();
return ret;
case 4:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_size(*(u32 *)to, from, 4, ret, 4);
__uaccess_end();
return ret;
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 673059a..6e5cc08 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -59,31 +59,31 @@
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
case 1:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u8 *)dst, (u8 __user *)src,
ret, "b", "b", "=q", 1);
__uaccess_end();
return ret;
case 2:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u16 *)dst, (u16 __user *)src,
ret, "w", "w", "=r", 2);
__uaccess_end();
return ret;
case 4:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u32 *)dst, (u32 __user *)src,
ret, "l", "k", "=r", 4);
__uaccess_end();
return ret;
case 8:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 8);
__uaccess_end();
return ret;
case 10:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 10);
if (likely(!ret))
@@ -93,7 +93,7 @@
__uaccess_end();
return ret;
case 16:
- __uaccess_begin();
+ __uaccess_begin_nospec();
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 16);
if (likely(!ret))
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 6ba66ee..62210da 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -12,12 +12,15 @@
* Returns true if handled.
*/
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+extern bool vsyscall_enabled(void);
#else
static inline void map_vsyscall(void) {}
static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
return false;
}
+static inline bool vsyscall_enabled(void) { return false; }
#endif
+extern unsigned long vsyscall_pgprot;
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index dc8a00a..1fdcb8c 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -44,6 +44,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
@@ -215,9 +216,9 @@
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
stac();
- asm volatile("call *%[call]"
+ asm volatile(CALL_NOSPEC
: __HYPERCALL_5PARAM
- : [call] "a" (&hypercall_page[call])
+ : [thunk_target] "a" (&hypercall_page[call])
: __HYPERCALL_CLOBBER5);
clac();
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 567de50..6768d13 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -77,7 +77,8 @@
#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
-#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
+#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
+#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
/*
* Intel CPU features in CR4
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b89bef9..0a1e8a6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -335,13 +335,12 @@
#ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+ u8 trigger, u32 gsi);
+
static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi)
{
- int ioapic;
- int pin;
- struct mpc_intsrc mp_irq;
-
/*
* Check bus_irq boundary.
*/
@@ -351,14 +350,6 @@
}
/*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return;
- pin = mp_find_ioapic_pin(ioapic, gsi);
-
- /*
* TBD: This check is for faulty timer entries, where the override
* erroneously sets the trigger to level, resulting in a HUGE
* increase of timer interrupts!
@@ -366,16 +357,8 @@
if ((bus_irq == 0) && (trigger == 3))
trigger = 1;
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = (trigger << 2) | polarity;
- mp_irq.srcbus = MP_ISA_BUS;
- mp_irq.srcbusirq = bus_irq; /* IRQ */
- mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
- mp_irq.dstirq = pin; /* INTIN# */
-
- mp_save_irq(&mp_irq);
-
+ if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
+ return;
/*
* Reset default identity mapping if gsi is also an legacy IRQ,
* otherwise there will be more than one entry with the same GSI
@@ -422,6 +405,34 @@
return 0;
}
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+ u8 trigger, u32 gsi)
+{
+ struct mpc_intsrc mp_irq;
+ int ioapic, pin;
+
+ /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0) {
+ pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
+ return ioapic;
+ }
+
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger << 2) | polarity;
+ mp_irq.srcbus = MP_ISA_BUS;
+ mp_irq.srcbusirq = bus_irq;
+ mp_irq.dstapic = mpc_ioapic_id(ioapic);
+ mp_irq.dstirq = pin;
+
+ mp_save_irq(&mp_irq);
+
+ return 0;
+}
+
static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{
@@ -466,7 +477,11 @@
if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
- mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+ if (bus_irq < NR_IRQS_LEGACY)
+ mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+ else
+ mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
+
acpi_penalize_sci_irq(bus_irq, trigger, polarity);
/*
@@ -720,7 +735,7 @@
#ifdef CONFIG_ACPI_HOTPLUG_CPU
#include <acpi/processor.h>
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
{
#ifdef CONFIG_ACPI_NUMA
int nid;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5cb272a..03b6e5c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -46,17 +46,6 @@
}
__setup("noreplace-smp", setup_noreplace_smp);
-#ifdef CONFIG_PARAVIRT
-static int __initdata_or_module noreplace_paravirt = 0;
-
-static int __init setup_noreplace_paravirt(char *str)
-{
- noreplace_paravirt = 1;
- return 1;
-}
-__setup("noreplace-paravirt", setup_noreplace_paravirt);
-#endif
-
#define DPRINTK(fmt, args...) \
do { \
if (debug_alternative) \
@@ -340,9 +329,12 @@
static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
{
unsigned long flags;
+ int i;
- if (instr[0] != 0x90)
- return;
+ for (i = 0; i < a->padlen; i++) {
+ if (instr[i] != 0x90)
+ return;
+ }
local_irq_save(flags);
add_nops(instr + (a->instrlen - a->padlen), a->padlen);
@@ -585,9 +577,6 @@
struct paravirt_patch_site *p;
char insnbuf[MAX_PATCH_LEN];
- if (noreplace_paravirt)
- return;
-
for (p = start; p < end; p++) {
unsigned int used;
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f3557a1..b5229ab 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -361,14 +361,17 @@
irq_data->chip_data = data;
irq_data->hwirq = virq + i;
err = assign_irq_vector_policy(virq + i, node, data, info);
- if (err)
+ if (err) {
+ irq_data->chip_data = NULL;
+ free_apic_chip_data(data);
goto error;
+ }
}
return 0;
error:
- x86_vector_free_irqs(domain, virq, i + 1);
+ x86_vector_free_irqs(domain, virq, i);
return err;
}
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4a8697f..33b6367 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -20,13 +20,11 @@
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
+obj-y += bugs.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
-obj-$(CONFIG_X86_32) += bugs.o
-obj-$(CONFIG_X86_64) += bugs_64.o
-
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2b4cf04..1b89f0c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -782,8 +782,32 @@
set_cpu_cap(c, X86_FEATURE_K8);
if (cpu_has(c, X86_FEATURE_XMM2)) {
- /* MFENCE stops RDTSC speculation */
- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ unsigned long long val;
+ int ret;
+
+ /*
+ * A serializing LFENCE has less overhead than MFENCE, so
+ * use it for execution serialization. On families which
+ * don't have that MSR, LFENCE is already serializing.
+ * msr_set_bit() uses the safe accessors, too, even if the MSR
+ * is not present.
+ */
+ msr_set_bit(MSR_F10H_DECFG,
+ MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+ /*
+ * Verify that the MSR write was successful (could be running
+ * under a hypervisor) and only then assume that LFENCE is
+ * serializing.
+ */
+ ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+ if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+ /* A serializing LFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+ } else {
+ /* MFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ }
}
/*
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bd17db1..957ad44 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -9,6 +9,11 @@
*/
#include <linux/init.h>
#include <linux/utsname.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/processor-flags.h>
@@ -16,15 +21,25 @@
#include <asm/msr.h>
#include <asm/paravirt.h>
#include <asm/alternative.h>
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+#include <asm/intel-family.h>
+
+static void __init spectre_v2_select_mitigation(void);
void __init check_bugs(void)
{
identify_boot_cpu();
-#ifndef CONFIG_SMP
- pr_info("CPU: ");
- print_cpu_info(&boot_cpu_data);
-#endif
+ if (!IS_ENABLED(CONFIG_SMP)) {
+ pr_info("CPU: ");
+ print_cpu_info(&boot_cpu_data);
+ }
+
+ /* Select the proper spectre mitigation before patching alternatives */
+ spectre_v2_select_mitigation();
+
+#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
*
@@ -40,4 +55,284 @@
alternative_instructions();
fpu__init_check_bugs();
+#else /* CONFIG_X86_64 */
+ alternative_instructions();
+
+ /*
+ * Make sure the first 2MB area is not mapped by huge pages
+ * There are typically fixed size MTRRs in there and overlapping
+ * MTRRs into large pages causes slow downs.
+ *
+ * Right now we don't do that with gbpages because there seems
+ * very little benefit for that case.
+ */
+ if (!direct_gbpages)
+ set_memory_4k((unsigned long)__va(0), 1);
+#endif
}
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+ SPECTRE_V2_CMD_NONE,
+ SPECTRE_V2_CMD_AUTO,
+ SPECTRE_V2_CMD_FORCE,
+ SPECTRE_V2_CMD_RETPOLINE,
+ SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+ SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+ [SPECTRE_V2_NONE] = "Vulnerable",
+ [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
+ [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
+ [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 : " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+#ifdef RETPOLINE
+static bool spectre_v2_bad_module;
+
+bool retpoline_module_ok(bool has_retpoline)
+{
+ if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
+ return true;
+
+ pr_err("System may be vulnerable to spectre v2\n");
+ spectre_v2_bad_module = true;
+ return false;
+}
+
+static inline const char *spectre_v2_module_string(void)
+{
+ return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
+}
+#else
+static inline const char *spectre_v2_module_string(void) { return ""; }
+#endif
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+ if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s selected on command line.\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ pr_info("%s selected on command line.\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+ return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+ int len = strlen(opt);
+
+ return len == arglen && !strncmp(arg, opt, len);
+}
+
+static const struct {
+ const char *option;
+ enum spectre_v2_mitigation_cmd cmd;
+ bool secure;
+} mitigation_options[] = {
+ { "off", SPECTRE_V2_CMD_NONE, false },
+ { "on", SPECTRE_V2_CMD_FORCE, true },
+ { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
+ { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
+ { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+ { "auto", SPECTRE_V2_CMD_AUTO, false },
+};
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+ char arg[20];
+ int ret, i;
+ enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
+
+ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ return SPECTRE_V2_CMD_NONE;
+ else {
+ ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+ sizeof(arg));
+ if (ret < 0)
+ return SPECTRE_V2_CMD_AUTO;
+
+ for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
+ if (!match_option(arg, ret, mitigation_options[i].option))
+ continue;
+ cmd = mitigation_options[i].cmd;
+ break;
+ }
+
+ if (i >= ARRAY_SIZE(mitigation_options)) {
+ pr_err("unknown option (%s). Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+ }
+
+ if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
+ cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
+ cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
+ !IS_ENABLED(CONFIG_RETPOLINE)) {
+ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (mitigation_options[i].secure)
+ spec2_print_if_secure(mitigation_options[i].option);
+ else
+ spec2_print_if_insecure(mitigation_options[i].option);
+
+ return cmd;
+}
+
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_data.x86 == 6) {
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ return true;
+ }
+ }
+ return false;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+ enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+ enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+ /*
+ * If the CPU is not affected and the command line mode is NONE or AUTO
+ * then nothing to do.
+ */
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+ (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+ return;
+
+ switch (cmd) {
+ case SPECTRE_V2_CMD_NONE:
+ return;
+
+ case SPECTRE_V2_CMD_FORCE:
+ case SPECTRE_V2_CMD_AUTO:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_auto;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE_AMD:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_amd;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_generic;
+ break;
+ case SPECTRE_V2_CMD_RETPOLINE:
+ if (IS_ENABLED(CONFIG_RETPOLINE))
+ goto retpoline_auto;
+ break;
+ }
+ pr_err("kernel not compiled with retpoline; no mitigation available!");
+ return;
+
+retpoline_auto:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ retpoline_amd:
+ if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+ pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+ goto retpoline_generic;
+ }
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+ SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ } else {
+ retpoline_generic:
+ mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+ SPECTRE_V2_RETPOLINE_MINIMAL;
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ }
+
+ spectre_v2_enabled = mode;
+ pr_info("%s\n", spectre_v2_strings[mode]);
+
+ /*
+ * If neither SMEP or KPTI are available, there is a risk of
+ * hitting userspace addresses in the RSB after a context switch
+ * from a shallow call stack to a deeper one. To prevent this fill
+ * the entire RSB, even when using IBRS.
+ *
+ * Skylake era CPUs have a separate issue with *underflow* of the
+ * RSB, when they will predict 'ret' targets from the generic BTB.
+ * The proper mitigation for this is IBRS. If IBRS is not supported
+ * or deactivated in favour of retpolines the RSB fill on context
+ * switch is required.
+ */
+ if ((!boot_cpu_has(X86_FEATURE_KAISER) &&
+ !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ pr_info("Filling RSB on context switch\n");
+ }
+
+ /* Initialize Indirect Branch Prediction Barrier if supported */
+ if (boot_cpu_has(X86_FEATURE_IBPB)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+ pr_info("Enabling Indirect Branch Prediction Barrier\n");
+ }
+}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ return sprintf(buf, "Not affected\n");
+ if (boot_cpu_has(X86_FEATURE_KAISER))
+ return sprintf(buf, "Mitigation: PTI\n");
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+ return sprintf(buf, "Not affected\n");
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ spectre_v2_module_string());
+}
+#endif
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
deleted file mode 100644
index a972ac4..0000000
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- * Copyright (C) 2000 SuSE
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <asm/alternative.h>
-#include <asm/bugs.h>
-#include <asm/processor.h>
-#include <asm/mtrr.h>
-#include <asm/cacheflush.h>
-
-void __init check_bugs(void)
-{
- identify_boot_cpu();
-#if !defined(CONFIG_SMP)
- pr_info("CPU: ");
- print_cpu_info(&boot_cpu_data);
-#endif
- alternative_instructions();
-
- /*
- * Make sure the first 2MB area is not mapped by huge pages
- * There are typically fixed size MTRRs in there and overlapping
- * MTRRs into large pages causes slow downs.
- *
- * Right now we don't do that with gbpages because there seems
- * very little benefit for that case.
- */
- if (!direct_gbpages)
- set_memory_4k((unsigned long)__va(0), 1);
-}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4eece91..08e89ed 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -44,6 +44,8 @@
#include <asm/pat.h>
#include <asm/microcode.h>
#include <asm/microcode_intel.h>
+#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
@@ -93,7 +95,7 @@
static const struct cpu_dev *this_cpu = &default_cpu;
-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
+DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
/*
* We need valid kernel segments for data and code in long mode too
@@ -163,6 +165,24 @@
}
__setup("nompx", x86_mpx_setup);
+#ifdef CONFIG_X86_64
+static int __init x86_pcid_setup(char *s)
+{
+ /* require an exact match without trailing characters */
+ if (strlen(s))
+ return 0;
+
+ /* do not emit a message if the feature is not present */
+ if (!boot_cpu_has(X86_FEATURE_PCID))
+ return 1;
+
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+ pr_info("nopcid: PCID feature disabled\n");
+ return 1;
+}
+__setup("nopcid", x86_pcid_setup);
+#endif
+
static int __init x86_noinvpcid_setup(char *s)
{
/* noinvpcid doesn't accept parameters */
@@ -306,6 +326,39 @@
}
}
+static void setup_pcid(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_PCID)) {
+ if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) {
+ cr4_set_bits(X86_CR4_PCIDE);
+ /*
+ * INVPCID has two "groups" of types:
+ * 1/2: Invalidate an individual address
+ * 3/4: Invalidate all contexts
+ *
+ * 1/2 take a PCID, but 3/4 do not. So, 3/4
+ * ignore the PCID argument in the descriptor.
+ * But, we have to be careful not to call 1/2
+ * with an actual non-zero PCID in them before
+ * we do the above cr4_set_bits().
+ */
+ if (cpu_has(c, X86_FEATURE_INVPCID))
+ set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE);
+ } else {
+ /*
+ * flush_tlb_all(), as currently implemented, won't
+ * work if PCID is on but PGE is not. Since that
+ * combination doesn't exist on real hardware, there's
+ * no reason to try to fully support it, but it's
+ * polite to avoid corrupting data if we're on
+ * an improperly configured VM.
+ */
+ clear_cpu_cap(c, X86_FEATURE_PCID);
+ }
+ }
+ kaiser_setup_pcid();
+}
+
/*
* Protection Keys are not available in 32-bit mode.
*/
@@ -429,8 +482,8 @@
return NULL; /* Not found */
}
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
void load_percpu_segment(int cpu)
{
@@ -655,6 +708,36 @@
}
}
+static void apply_forced_caps(struct cpuinfo_x86 *c)
+{
+ int i;
+
+ for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
+ c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ c->x86_capability[i] |= cpu_caps_set[i];
+ }
+}
+
+static void init_speculation_control(struct cpuinfo_x86 *c)
+{
+ /*
+ * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
+ * and they also have a different bit for STIBP support. Also,
+ * a hypervisor might have set the individual AMD bits even on
+ * Intel CPUs, for finer-grained selection of what's available.
+ *
+ * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
+ * features, which are visible in /proc/cpuinfo and used by the
+ * kernel. So set those accordingly from the Intel bits.
+ */
+ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
+ set_cpu_cap(c, X86_FEATURE_IBRS);
+ set_cpu_cap(c, X86_FEATURE_IBPB);
+ }
+ if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
+ set_cpu_cap(c, X86_FEATURE_STIBP);
+}
+
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -676,6 +759,7 @@
cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_7_0_EBX] = ebx;
c->x86_capability[CPUID_7_ECX] = ecx;
+ c->x86_capability[CPUID_7_EDX] = edx;
}
/* Extended state features: level 0x0000000d */
@@ -748,6 +832,7 @@
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
init_scattered_cpuid_features(c);
+ init_speculation_control(c);
}
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
@@ -776,6 +861,41 @@
#endif
}
+static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
+ { X86_VENDOR_CENTAUR, 5 },
+ { X86_VENDOR_INTEL, 5 },
+ { X86_VENDOR_NSC, 5 },
+ { X86_VENDOR_ANY, 4 },
+ {}
+};
+
+static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
+ { X86_VENDOR_AMD },
+ {}
+};
+
+static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+{
+ u64 ia32_cap = 0;
+
+ if (x86_match_cpu(cpu_no_meltdown))
+ return false;
+
+ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+
+ /* Rogue Data Cache Load? No! */
+ if (ia32_cap & ARCH_CAP_RDCL_NO)
+ return false;
+
+ return true;
+}
+
/*
* Do minimum CPU detection early.
* Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -821,7 +941,23 @@
}
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+ if (!x86_match_cpu(cpu_no_speculation)) {
+ if (cpu_vulnerable_to_meltdown(c))
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+ }
+
fpu__init_system(c);
+
+#ifdef CONFIG_X86_32
+ /*
+ * Regardless of whether PCID is enumerated, the SDM says
+ * that it can't be enabled in 32-bit mode.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
}
void __init early_cpu_init(void)
@@ -1035,10 +1171,7 @@
this_cpu->c_identify(c);
/* Clear/Set all flags overridden by options, after probe */
- for (i = 0; i < NCAPINTS; i++) {
- c->x86_capability[i] &= ~cpu_caps_cleared[i];
- c->x86_capability[i] |= cpu_caps_set[i];
- }
+ apply_forced_caps(c);
#ifdef CONFIG_X86_64
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
@@ -1064,6 +1197,9 @@
setup_smep(c);
setup_smap(c);
+ /* Set up PCID */
+ setup_pcid(c);
+
/*
* The vendor-specific functions might have changed features.
* Now we do "generic changes."
@@ -1097,10 +1233,7 @@
* Clear/Set all flags overridden by options, need do it
* before following smp all cpus cap AND.
*/
- for (i = 0; i < NCAPINTS; i++) {
- c->x86_capability[i] &= ~cpu_caps_cleared[i];
- c->x86_capability[i] |= cpu_caps_set[i];
- }
+ apply_forced_caps(c);
/*
* On SMP, boot_cpu_data holds the common feature set between
@@ -1325,7 +1458,7 @@
[DEBUG_STACK - 1] = DEBUG_STKSZ
};
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
/* May not be marked __init: used by software suspend */
@@ -1483,6 +1616,14 @@
* try to read it.
*/
cr4_init_shadow();
+ if (!kaiser_enabled) {
+ /*
+ * secondary_startup_64() deferred setting PGE in cr4:
+ * probe_page_size_mask() sets it on the boot cpu,
+ * but it needs to be set on each secondary cpu.
+ */
+ cr4_set_bits(X86_CR4_PGE);
+ }
/*
* Load microcode on this cpu if a valid microcode is available.
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fcd484d..4097b43 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -61,6 +61,59 @@
}
}
+/*
+ * Early microcode releases for the Spectre v2 mitigation were broken.
+ * Information taken from;
+ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
+ * - https://kb.vmware.com/s/article/52345
+ * - Microcode revisions observed in the wild
+ * - Release note from 20180108 microcode release
+ */
+struct sku_microcode {
+ u8 model;
+ u8 stepping;
+ u32 microcode;
+};
+static const struct sku_microcode spectre_bad_microcodes[] = {
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 },
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 },
+ { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 },
+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 },
+ { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 },
+ { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
+ { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
+ { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 },
+ { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
+ { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
+ { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
+ { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
+ { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 },
+ { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
+ { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 },
+ { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 },
+ { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 },
+ { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
+ { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
+ { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
+ /* Updated in the 20180108 release; blacklist until we know otherwise */
+ { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 },
+ /* Observed in the wild */
+ { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
+ { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
+};
+
+static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
+ if (c->x86_model == spectre_bad_microcodes[i].model &&
+ c->x86_mask == spectre_bad_microcodes[i].stepping)
+ return (c->microcode <= spectre_bad_microcodes[i].microcode);
+ }
+ return false;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -87,6 +140,19 @@
rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
}
+ /* Now if any of them are set, check the blacklist and clear the lot */
+ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
+ cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
+ cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
+ cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
+ pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
+ setup_clear_cpu_cap(X86_FEATURE_IBRS);
+ setup_clear_cpu_cap(X86_FEATURE_IBPB);
+ setup_clear_cpu_cap(X86_FEATURE_STIBP);
+ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
+ setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
+ }
+
/*
* Atom erratum AAE44/AAF40/AAG38/AAH41:
*
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index de6626c..be63371 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -934,6 +934,8 @@
ci_leaf_init(this_leaf++, &id4_regs);
__cache_cpumap_setup(cpu, idx, &id4_regs);
}
+ this_cpu_ci->cpu_map_populated = true;
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8ca5f8a..fe5cd6e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1754,6 +1754,11 @@
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
+dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
+{
+ machine_check_vector(regs, error_code);
+}
+
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 017bda1..b74bb29 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -592,6 +592,7 @@
#define F14H_MPB_MAX_SIZE 1824
#define F15H_MPB_MAX_SIZE 4096
#define F16H_MPB_MAX_SIZE 3458
+#define F17H_MPB_MAX_SIZE 3200
switch (family) {
case 0x14:
@@ -603,6 +604,9 @@
case 0x16:
max_size = F16H_MPB_MAX_SIZE;
break;
+ case 0x17:
+ max_size = F17H_MPB_MAX_SIZE;
+ break;
default:
max_size = F1XH_MPB_MAX_SIZE;
break;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 5ce5155..0afaf00 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -43,7 +43,7 @@
#define MICROCODE_VERSION "2.01"
static struct microcode_ops *microcode_ops;
-static bool dis_ucode_ldr;
+static bool dis_ucode_ldr = true;
/*
* Synchronization.
@@ -73,6 +73,7 @@
static bool __init check_loader_disabled_bsp(void)
{
static const char *__dis_opt_str = "dis_ucode_ldr";
+ u32 a, b, c, d;
#ifdef CONFIG_X86_32
const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
@@ -85,8 +86,20 @@
bool *res = &dis_ucode_ldr;
#endif
- if (cmdline_find_option_bool(cmdline, option))
- *res = true;
+ a = 1;
+ c = 0;
+ native_cpuid(&a, &b, &c, &d);
+
+ /*
+ * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
+ * completely accurate as xen pv guests don't see that CPUID bit set but
+ * that's good enough as they don't land on the BSP path anyway.
+ */
+ if (c & BIT(31))
+ return *res;
+
+ if (cmdline_find_option_bool(cmdline, option) <= 0)
+ *res = false;
return *res;
}
@@ -114,9 +127,7 @@
{
int vendor;
unsigned int family;
-
- if (check_loader_disabled_bsp())
- return;
+ bool intel = true;
if (!have_cpuid_p())
return;
@@ -126,16 +137,27 @@
switch (vendor) {
case X86_VENDOR_INTEL:
- if (family >= 6)
- load_ucode_intel_bsp();
+ if (family < 6)
+ return;
break;
+
case X86_VENDOR_AMD:
- if (family >= 0x10)
- load_ucode_amd_bsp(family);
+ if (family < 0x10)
+ return;
+ intel = false;
break;
+
default:
- break;
+ return;
}
+
+ if (check_loader_disabled_bsp())
+ return;
+
+ if (intel)
+ load_ucode_intel_bsp();
+ else
+ load_ucode_amd_bsp(family);
}
static bool check_loader_disabled_ap(void)
@@ -154,9 +176,6 @@
if (check_loader_disabled_ap())
return;
- if (!have_cpuid_p())
- return;
-
vendor = x86_cpuid_vendor();
family = x86_cpuid_family();
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 13dbcc0..f90f176 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -40,6 +40,9 @@
#include <asm/setup.h>
#include <asm/msr.h>
+/* last level cache size per core */
+static int llc_size_per_core;
+
/*
* Temporary microcode blobs pointers storage. We note here during early load
* the pointers to microcode blobs we've got from whatever storage (detached
@@ -1051,8 +1054,19 @@
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
- pr_err_once("late loading on model 79 is disabled.\n");
+ /*
+ * Late loading on model 79 with microcode revision less than 0x0b000021
+ * and LLC size per core bigger than 2.5MB may result in a system hang.
+ * This behavior is documented in item BDF90, #334165 (Intel Xeon
+ * Processor E7-8800/4800 v4 Product Family).
+ */
+ if (c->x86 == 6 &&
+ c->x86_model == INTEL_FAM6_BROADWELL_X &&
+ c->x86_mask == 0x01 &&
+ llc_size_per_core > 2621440 &&
+ c->microcode < 0x0b000021) {
+ pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+ pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
return true;
}
@@ -1116,6 +1130,15 @@
.microcode_fini_cpu = microcode_fini_cpu,
};
+static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
+{
+ u64 llc_size = c->x86_cache_size * 1024;
+
+ do_div(llc_size, c->x86_max_cores);
+
+ return (int)llc_size;
+}
+
struct microcode_ops * __init init_intel_microcode(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -1126,6 +1149,8 @@
return NULL;
}
+ llc_size_per_core = calc_llc_size_per_core(c);
+
return µcode_intel_ops;
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 1db8dc4..afbb525 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,9 +31,6 @@
const struct cpuid_bit *cb;
static const struct cpuid_bit cpuid_bits[] = {
- { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 },
- { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 },
- { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 04f89ca..e33b385 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -41,6 +41,7 @@
#include <asm/pgalloc.h>
#include <asm/setup.h>
#include <asm/espfix.h>
+#include <asm/kaiser.h>
/*
* Note: we only need 6*8 = 48 bytes for the espfix stack, but round
@@ -126,6 +127,15 @@
/* Install the espfix pud into the kernel page directory */
pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
+ /*
+ * Just copy the top-level PGD that is mapping the espfix
+ * area to ensure it is mapped into the shadow user page
+ * tables.
+ */
+ if (kaiser_enabled) {
+ set_pgd(native_get_shadow_pgd(pgd_p),
+ __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
+ }
/* Randomize the locations */
init_espfix_random();
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 095ef7d..abfbb61b 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1077,6 +1077,7 @@
* Add back in the features that came in from userspace:
*/
xsave->header.xfeatures |= xfeatures;
+ xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xsave->header.xfeatures;
return 0;
}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b4421cc..67cd7c1 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -190,8 +190,8 @@
movq $(init_level4_pgt - __START_KERNEL_map), %rax
1:
- /* Enable PAE mode and PGE */
- movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
+ /* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */
+ movl $(X86_CR4_PAE | X86_CR4_PSE), %ecx
movq %rcx, %cr4
/* Setup early boot stage 4 level pagetables. */
@@ -405,6 +405,27 @@
.balign PAGE_SIZE; \
GLOBAL(name)
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Each PGD needs to be 8k long and 8k aligned. We do not
+ * ever go out to userspace with these, so we do not
+ * strictly *need* the second page, but this allows us to
+ * have a single set_pgd() implementation that does not
+ * need to worry about whether it has 4k or 8k to work
+ * with.
+ *
+ * This ensures PGDs are 8k long:
+ */
+#define KAISER_USER_PGD_FILL 512
+/* This ensures they are 8k-aligned: */
+#define NEXT_PGD_PAGE(name) \
+ .balign 2 * PAGE_SIZE; \
+GLOBAL(name)
+#else
+#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
+#define KAISER_USER_PGD_FILL 0
+#endif
+
/* Automate the creation of 1 to 1 mapping pmd entries */
#define PMDS(START, PERM, COUNT) \
i = 0 ; \
@@ -414,9 +435,10 @@
.endr
__INITDATA
-NEXT_PAGE(early_level4_pgt)
+NEXT_PGD_PAGE(early_level4_pgt)
.fill 511,8,0
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ .fill KAISER_USER_PGD_FILL,8,0
NEXT_PAGE(early_dynamic_pgts)
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -424,16 +446,18 @@
.data
#ifndef CONFIG_XEN
-NEXT_PAGE(init_level4_pgt)
+NEXT_PGD_PAGE(init_level4_pgt)
.fill 512,8,0
+ .fill KAISER_USER_PGD_FILL,8,0
#else
-NEXT_PAGE(init_level4_pgt)
+NEXT_PGD_PAGE(init_level4_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
.org init_level4_pgt + L4_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
.org init_level4_pgt + L4_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ .fill KAISER_USER_PGD_FILL,8,0
NEXT_PAGE(level3_ident_pgt)
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
@@ -444,6 +468,7 @@
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
#endif
+ .fill KAISER_USER_PGD_FILL,8,0
NEXT_PAGE(level3_kernel_pgt)
.fill L3_START_KERNEL,8,0
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 932348fb..9512529 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -354,7 +354,7 @@
irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
- disable_irq(hdev->irq);
+ disable_hardirq(hdev->irq);
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 1f38d9a..2763573 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -19,6 +19,7 @@
#include <linux/mm.h>
#include <asm/apic.h>
+#include <asm/nospec-branch.h>
#ifdef CONFIG_DEBUG_STACKOVERFLOW
@@ -54,17 +55,17 @@
static void call_on_stack(void *func, void *stack)
{
asm volatile("xchgl %%ebx,%%esp \n"
- "call *%%edi \n"
+ CALL_NOSPEC
"movl %%ebx,%%esp \n"
: "=b" (stack)
: "0" (stack),
- "D"(func)
+ [thunk_target] "D"(func)
: "memory", "cc", "edx", "ecx", "eax");
}
static inline void *current_stack(void)
{
- return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+ return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
}
static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
@@ -88,17 +89,17 @@
/* Save the next esp at the bottom of the stack */
prev_esp = (u32 *)irqstk;
- *prev_esp = current_stack_pointer();
+ *prev_esp = current_stack_pointer;
if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp);
asm volatile("xchgl %%ebx,%%esp \n"
- "call *%%edi \n"
+ CALL_NOSPEC
"movl %%ebx,%%esp \n"
: "=a" (arg1), "=b" (isp)
: "0" (desc), "1" (isp),
- "D" (desc->handle_irq)
+ [thunk_target] "D" (desc->handle_irq)
: "memory", "cc", "ecx");
return 1;
}
@@ -139,7 +140,7 @@
/* Push the previous esp onto the stack */
prev_esp = (u32 *)irqstk;
- *prev_esp = current_stack_pointer();
+ *prev_esp = current_stack_pointer;
call_on_stack(__do_softirq, isp);
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 1423ab1..f480b38 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -51,7 +51,7 @@
.flags = IRQF_NO_THREAD,
};
-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = {
[0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
};
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 5f8f0b3..2c0b0b6 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -26,7 +26,7 @@
#include "common.h"
static nokprobe_inline
-int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+void __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb, unsigned long orig_ip)
{
/*
@@ -41,20 +41,21 @@
__this_cpu_write(current_kprobe, NULL);
if (orig_ip)
regs->ip = orig_ip;
- return 1;
}
int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
- if (kprobe_ftrace(p))
- return __skip_singlestep(p, regs, kcb, 0);
- else
- return 0;
+ if (kprobe_ftrace(p)) {
+ __skip_singlestep(p, regs, kcb, 0);
+ preempt_enable_no_resched();
+ return 1;
+ }
+ return 0;
}
NOKPROBE_SYMBOL(skip_singlestep);
-/* Ftrace callback handler for kprobes */
+/* Ftrace callback handler for kprobes -- called under preepmt disabed */
void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
{
@@ -77,13 +78,17 @@
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);
+ /* To emulate trap based kprobes, preempt_disable here */
+ preempt_disable();
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (!p->pre_handler || !p->pre_handler(p, regs))
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
__skip_singlestep(p, regs, kcb, orig_ip);
+ preempt_enable_no_resched();
+ }
/*
* If pre_handler returns !0, it sets regs->ip and
- * resets current kprobe.
+ * resets current kprobe, and keep preempt count +1.
*/
}
end:
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 4d74f73..dc20da1c 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -37,6 +37,7 @@
#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/debugreg.h>
+#include <asm/nospec-branch.h>
#include "common.h"
@@ -192,7 +193,7 @@
}
/* Check whether insn is indirect jump */
-static int insn_is_indirect_jump(struct insn *insn)
+static int __insn_is_indirect_jump(struct insn *insn)
{
return ((insn->opcode.bytes[0] == 0xff &&
(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -226,6 +227,26 @@
return (start <= target && target <= start + len);
}
+static int insn_is_indirect_jump(struct insn *insn)
+{
+ int ret = __insn_is_indirect_jump(insn);
+
+#ifdef CONFIG_RETPOLINE
+ /*
+ * Jump to x86_indirect_thunk_* is treated as an indirect jump.
+ * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
+ * older gcc may use indirect jump. So we add this check instead of
+ * replace indirect-jump check.
+ */
+ if (!ret)
+ ret = insn_jump_into_range(insn,
+ (unsigned long)__indirect_thunk_start,
+ (unsigned long)__indirect_thunk_end -
+ (unsigned long)__indirect_thunk_start);
+#endif
+ return ret;
+}
+
/* Decode whole function to ensure any instructions don't jump into target */
static int can_optimize(unsigned long paddr)
{
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 6707039..8bc68cf 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,9 +12,11 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp.h>
+#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
+#include <linux/kaiser.h>
#include <asm/ldt.h>
#include <asm/desc.h>
@@ -33,11 +35,21 @@
set_ldt(pc->ldt->entries, pc->ldt->size);
}
+static void __free_ldt_struct(struct ldt_struct *ldt)
+{
+ if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
+ vfree(ldt->entries);
+ else
+ free_page((unsigned long)ldt->entries);
+ kfree(ldt);
+}
+
/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
static struct ldt_struct *alloc_ldt_struct(int size)
{
struct ldt_struct *new_ldt;
int alloc_size;
+ int ret;
if (size > LDT_ENTRIES)
return NULL;
@@ -65,7 +77,13 @@
return NULL;
}
+ ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
+ __PAGE_KERNEL);
new_ldt->size = size;
+ if (ret) {
+ __free_ldt_struct(new_ldt);
+ return NULL;
+ }
return new_ldt;
}
@@ -91,12 +109,10 @@
if (likely(!ldt))
return;
+ kaiser_remove_mapping((unsigned long)ldt->entries,
+ ldt->size * LDT_ENTRY_SIZE);
paravirt_free_ldt(ldt->entries, ldt->size);
- if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
- vfree(ldt->entries);
- else
- free_page((unsigned long)ldt->entries);
- kfree(ldt);
+ __free_ldt_struct(ldt);
}
/*
@@ -271,8 +287,8 @@
return error;
}
-asmlinkage int sys_modify_ldt(int func, void __user *ptr,
- unsigned long bytecount)
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
+ unsigned long , bytecount)
{
int ret = -ENOSYS;
@@ -290,5 +306,14 @@
ret = write_ldt(ptr, bytecount, 0);
break;
}
- return ret;
+ /*
+ * The SYSCALL_DEFINE() macros give us an 'unsigned long'
+ * return type, but tht ABI for sys_modify_ldt() expects
+ * 'int'. This cast gives us an int-sized value in %rax
+ * for the return code. The 'unsigned' is necessary so
+ * the compiler does not try to sign-extend the negative
+ * return codes into the high half of the register when
+ * taking the value from int->long.
+ */
+ return (unsigned int)ret;
}
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index 7b0d3da..287ec3b 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -8,7 +8,7 @@
#include <asm/ptrace.h>
#include <asm/ftrace.h>
#include <asm/export.h>
-
+#include <asm/nospec-branch.h>
.code64
.section .entry.text, "ax"
@@ -290,8 +290,9 @@
* ip and parent ip are used and the list function is called when
* function tracing is enabled.
*/
- call *ftrace_trace_function
+ movq ftrace_trace_function, %r8
+ CALL_NOSPEC %r8
restore_mcount_regs
jmp fgraph_trace
@@ -334,5 +335,5 @@
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $24, %rsp
- jmp *%rdi
+ JMP_NOSPEC %rdi
#endif
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index bb3840c..ee43b36 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -9,7 +9,6 @@
DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
@@ -59,7 +58,6 @@
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
PATCH_SITE(pv_cpu_ops, clts);
- PATCH_SITE(pv_mmu_ops, flush_tlb_single);
PATCH_SITE(pv_cpu_ops, wbinvd);
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index fc7cf64..54b2711 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -41,7 +41,7 @@
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
.x86_tss = {
.sp0 = TOP_OF_INIT_STACK,
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0887d2a..dffe81d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -538,7 +538,7 @@
current->personality &= ~READ_IMPLIES_EXEC;
/* in_compat_syscall() uses the presence of the x32
syscall bit flag to determine compat status */
- current->thread.status &= ~TS_COMPAT;
+ current_thread_info()->status &= ~TS_COMPAT;
} else {
set_thread_flag(TIF_IA32);
clear_thread_flag(TIF_X32);
@@ -546,7 +546,7 @@
current->mm->context.ia32_compat = TIF_IA32;
current->personality |= force_personality32;
/* Prepare the first "return" to user space */
- current->thread.status |= TS_COMPAT;
+ current_thread_info()->status |= TS_COMPAT;
}
}
EXPORT_SYMBOL_GPL(set_personality_ia32);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0e63c02..e497d37 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -934,7 +934,7 @@
*/
regs->orig_ax = value;
if (syscall_get_nr(child, regs) >= 0)
- child->thread.status |= TS_I386_REGS_POKED;
+ child->thread_info.status |= TS_I386_REGS_POKED;
break;
case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 067f981..ce020a6 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -106,6 +106,10 @@
load_cr3(initial_page_table);
#else
write_cr3(real_mode_header->trampoline_pgd);
+
+ /* Exiting long mode will fail if CR4.PCIDE is set. */
+ if (static_cpu_has(X86_FEATURE_PCID))
+ cr4_clear_bits(X86_CR4_PCIDE);
#endif
/* Jump to the identity-mapped low memory code */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index feaab07..6b55012 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -114,6 +114,7 @@
#include <asm/microcode.h>
#include <asm/mmu_context.h>
#include <asm/kaslr.h>
+#include <asm/kaiser.h>
/*
* max_low_pfn_mapped: highest direct mapped pfn under 4GB
@@ -1019,6 +1020,12 @@
*/
init_hypervisor_platform();
+ /*
+ * This needs to happen right after XENPV is set on xen and
+ * kaiser_enabled is checked below in cleanup_highmap().
+ */
+ kaiser_check_boottime_disable();
+
x86_init.resources.probe_roms();
/* after parse_early_param, so could debug it */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 763af1d..b1a5d25 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -785,7 +785,7 @@
* than the tracee.
*/
#ifdef CONFIG_IA32_EMULATION
- if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
+ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9fe7b9e..e803d72 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -115,14 +115,10 @@
spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(0xa, 0xf);
spin_unlock_irqrestore(&rtc_lock, flags);
- local_flush_tlb();
- pr_debug("1.\n");
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
start_eip >> 4;
- pr_debug("2.\n");
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
start_eip & 0xf;
- pr_debug("3.\n");
}
static inline void smpboot_restore_warm_reset_vector(void)
@@ -130,11 +126,6 @@
unsigned long flags;
/*
- * Install writable page 0 entry to set BIOS data area.
- */
- local_flush_tlb();
-
- /*
* Paranoid: Set warm reset code and vector here back
* to default values.
*/
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 8402907..21454e2 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -134,6 +134,16 @@
return -1;
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
pte_unmap(pte);
+
+ /*
+ * PTI poisons low addresses in the kernel page tables in the
+ * name of making them unusable for userspace. To execute
+ * code at such a low address, the poison must be cleared.
+ *
+ * Note: 'pgd' actually gets set in pud_alloc().
+ */
+ pgd->pgd &= ~_PAGE_NX;
+
return 0;
}
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
index 1c113db..2bb5ee4 100644
--- a/arch/x86/kernel/tracepoint.c
+++ b/arch/x86/kernel/tracepoint.c
@@ -9,10 +9,12 @@
#include <linux/atomic.h>
atomic_t trace_idt_ctr = ATOMIC_INIT(0);
+__aligned(PAGE_SIZE)
struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
(unsigned long) trace_idt_table };
/* No need to be aligned, but done to keep all IDTs defined the same way. */
+__aligned(PAGE_SIZE)
gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
static int trace_irq_vector_refcount;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index bd4e3d4..322f433 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -153,7 +153,7 @@
* from double_fault.
*/
BUG_ON((unsigned long)(current_top_of_stack() -
- current_stack_pointer()) >= THREAD_SIZE);
+ current_stack_pointer) >= THREAD_SIZE);
preempt_enable_no_resched();
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 44bf5cf..d07a9390 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -693,7 +693,6 @@
case INTEL_FAM6_KABYLAKE_DESKTOP:
crystal_khz = 24000; /* 24.0 MHz */
break;
- case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ATOM_DENVERTON:
crystal_khz = 25000; /* 25.0 MHz */
break;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 01f30e5..4b30128 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -191,7 +191,7 @@
pte_unmap_unlock(pte, ptl);
out:
up_write(&mm->mmap_sem);
- flush_tlb();
+ flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, 0UL);
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index dbf67f6..c7194e9 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -105,6 +105,13 @@
SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
+
+#ifdef CONFIG_RETPOLINE
+ __indirect_thunk_start = .;
+ *(.text.__x86.indirect_thunk)
+ __indirect_thunk_end = .;
+#endif
+
/* End of text section */
_etext = .;
} :text = 0x9090
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 91af75e..93f924d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -355,6 +355,10 @@
F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
+ /* cpuid 0x80000008.ebx */
+ const u32 kvm_cpuid_8000_0008_ebx_x86_features =
+ F(IBPB) | F(IBRS);
+
/* cpuid 0xC0000001.edx */
const u32 kvm_cpuid_C000_0001_edx_x86_features =
F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
@@ -376,6 +380,10 @@
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
+ /* cpuid 7.0.edx*/
+ const u32 kvm_cpuid_7_0_edx_x86_features =
+ F(SPEC_CTRL) | F(ARCH_CAPABILITIES);
+
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -458,12 +466,14 @@
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
+ entry->edx &= kvm_cpuid_7_0_edx_x86_features;
+ cpuid_mask(&entry->edx, CPUID_7_EDX);
} else {
entry->ebx = 0;
entry->ecx = 0;
+ entry->edx = 0;
}
entry->eax = 0;
- entry->edx = 0;
break;
}
case 9:
@@ -607,7 +617,14 @@
if (!g_phys_as)
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
- entry->ebx = entry->edx = 0;
+ entry->edx = 0;
+ /* IBRS and IBPB aren't necessarily present in hardware cpuid */
+ if (boot_cpu_has(X86_FEATURE_IBPB))
+ entry->ebx |= F(IBPB);
+ if (boot_cpu_has(X86_FEATURE_IBRS))
+ entry->ebx |= F(IBRS);
+ entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
+ cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
break;
}
case 0x80000019:
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 9368fec..d1beb71 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -160,6 +160,37 @@
return best && (best->edx & bit(X86_FEATURE_RDTSCP));
}
+static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ if (best && (best->ebx & bit(X86_FEATURE_IBPB)))
+ return true;
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
+}
+
+static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ if (best && (best->ebx & bit(X86_FEATURE_IBRS)))
+ return true;
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
+}
+
+static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES));
+}
+
+
/*
* NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
*/
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 9606a9d..c8d5738 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -25,6 +25,7 @@
#include <asm/kvm_emulate.h>
#include <linux/stringify.h>
#include <asm/debugreg.h>
+#include <asm/nospec-branch.h>
#include "x86.h"
#include "tss.h"
@@ -1012,8 +1013,8 @@
void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
- asm("push %[flags]; popf; call *%[fastop]"
- : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
+ asm("push %[flags]; popf; " CALL_NOSPEC
+ : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
return rc;
}
@@ -2395,9 +2396,21 @@
}
static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
- u64 cr0, u64 cr4)
+ u64 cr0, u64 cr3, u64 cr4)
{
int bad;
+ u64 pcid;
+
+ /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
+ pcid = 0;
+ if (cr4 & X86_CR4_PCIDE) {
+ pcid = cr3 & 0xfff;
+ cr3 &= ~0xfff;
+ }
+
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
/*
* First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2416,6 +2429,12 @@
bad = ctxt->ops->set_cr(ctxt, 4, cr4);
if (bad)
return X86EMUL_UNHANDLEABLE;
+ if (pcid) {
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+ }
+
}
return X86EMUL_CONTINUE;
@@ -2426,11 +2445,11 @@
struct desc_struct desc;
struct desc_ptr dt;
u16 selector;
- u32 val, cr0, cr4;
+ u32 val, cr0, cr3, cr4;
int i;
cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+ cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
@@ -2472,14 +2491,14 @@
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
- return rsm_enter_protected_mode(ctxt, cr0, cr4);
+ return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
}
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
{
struct desc_struct desc;
struct desc_ptr dt;
- u64 val, cr0, cr4;
+ u64 val, cr0, cr3, cr4;
u32 base3;
u16 selector;
int i, r;
@@ -2496,7 +2515,7 @@
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
+ cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
val = GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2524,7 +2543,7 @@
dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
- r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+ r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
if (r != X86EMUL_CONTINUE)
return r;
@@ -4972,6 +4991,8 @@
bool op_prefix = false;
bool has_seg_override = false;
struct opcode opcode;
+ u16 dummy;
+ struct desc_struct desc;
ctxt->memop.type = OP_NONE;
ctxt->memopp = NULL;
@@ -4990,6 +5011,11 @@
switch (mode) {
case X86EMUL_MODE_REAL:
case X86EMUL_MODE_VM86:
+ def_op_bytes = def_ad_bytes = 2;
+ ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
+ if (desc.d)
+ def_op_bytes = def_ad_bytes = 4;
+ break;
case X86EMUL_MODE_PROT16:
def_op_bytes = def_ad_bytes = 2;
break;
@@ -5286,9 +5312,9 @@
if (!(ctxt->d & ByteOp))
fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
- asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
+ asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
: "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
- [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
+ [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
: "c"(ctxt->src2.val));
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 6e219e5..5f810bb 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -257,8 +257,7 @@
index == RTC_GSI) {
if (kvm_apic_match_dest(vcpu, NULL, 0,
e->fields.dest_id, e->fields.dest_mode) ||
- (e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
- kvm_apic_pending_eoi(vcpu, e->fields.vector)))
+ kvm_apic_pending_eoi(vcpu, e->fields.vector))
__set_bit(e->fields.vector,
ioapic_handled_vectors);
}
@@ -279,6 +278,7 @@
{
unsigned index;
bool mask_before, mask_after;
+ int old_remote_irr, old_delivery_status;
union kvm_ioapic_redirect_entry *e;
switch (ioapic->ioregsel) {
@@ -301,14 +301,28 @@
return;
e = &ioapic->redirtbl[index];
mask_before = e->fields.mask;
+ /* Preserve read-only fields */
+ old_remote_irr = e->fields.remote_irr;
+ old_delivery_status = e->fields.delivery_status;
if (ioapic->ioregsel & 1) {
e->bits &= 0xffffffff;
e->bits |= (u64) val << 32;
} else {
e->bits &= ~0xffffffffULL;
e->bits |= (u32) val;
- e->fields.remote_irr = 0;
}
+ e->fields.remote_irr = old_remote_irr;
+ e->fields.delivery_status = old_delivery_status;
+
+ /*
+ * Some OSes (Linux, Xen) assume that Remote IRR bit will
+ * be cleared by IOAPIC hardware when the entry is configured
+ * as edge-triggered. This behavior is used to simulate an
+ * explicit EOI on IOAPICs that don't have the EOI register.
+ */
+ if (e->fields.trig_mode == IOAPIC_EDGE_TRIG)
+ e->fields.remote_irr = 0;
+
mask_after = e->fields.mask;
if (mask_before != mask_after)
kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3f05c04..b24b3c6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -246,9 +246,14 @@
recalculate_apic_map(apic->vcpu->kvm);
}
+static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
+{
+ return ((id >> 4) << 16) | (1 << (id & 0xf));
+}
+
static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
{
- u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
+ u32 ldr = kvm_apic_calc_x2apic_ldr(id);
kvm_lapic_set_reg(apic, APIC_ID, id);
kvm_lapic_set_reg(apic, APIC_LDR, ldr);
@@ -2029,6 +2034,7 @@
{
if (apic_x2apic_mode(vcpu->arch.apic)) {
u32 *id = (u32 *)(s->regs + APIC_ID);
+ u32 *ldr = (u32 *)(s->regs + APIC_LDR);
if (vcpu->kvm->arch.x2apic_format) {
if (*id != vcpu->vcpu_id)
@@ -2039,6 +2045,10 @@
else
*id <<= 24;
}
+
+ /* In x2APIC mode, the LDR is fixed and based on the id */
+ if (set)
+ *ldr = kvm_apic_calc_x2apic_ldr(*id);
}
return 0;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d29c745..0a324e1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -5052,13 +5052,13 @@
{
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
sizeof(struct pte_list_desc),
- 0, 0, NULL);
+ 0, SLAB_ACCOUNT, NULL);
if (!pte_list_desc_cache)
goto nomem;
mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
sizeof(struct kvm_mmu_page),
- 0, 0, NULL);
+ 0, SLAB_ACCOUNT, NULL);
if (!mmu_page_header_cache)
goto nomem;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4fbf0c9..be644af 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -44,6 +44,7 @@
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -182,6 +183,8 @@
u64 gs_base;
} host;
+ u64 spec_ctrl;
+
u32 *msrpm;
ulong nmi_iret_rip;
@@ -247,6 +250,8 @@
{ .index = MSR_CSTAR, .always = true },
{ .index = MSR_SYSCALL_MASK, .always = true },
#endif
+ { .index = MSR_IA32_SPEC_CTRL, .always = false },
+ { .index = MSR_IA32_PRED_CMD, .always = false },
{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
@@ -509,6 +514,7 @@
struct kvm_ldttss_desc *tss_desc;
struct page *save_area;
+ struct vmcb *current_vmcb;
};
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
@@ -860,6 +866,25 @@
return false;
}
+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
+{
+ u8 bit_write;
+ unsigned long tmp;
+ u32 offset;
+ u32 *msrpm;
+
+ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
+ to_svm(vcpu)->msrpm;
+
+ offset = svm_msrpm_offset(msr);
+ bit_write = 2 * (msr & 0x0f) + 1;
+ tmp = msrpm[offset];
+
+ BUG_ON(offset == MSR_INVALID);
+
+ return !!test_bit(bit_write, &tmp);
+}
+
static void set_msr_interception(u32 *msrpm, unsigned msr,
int read, int write)
{
@@ -1382,6 +1407,9 @@
unsigned long flags;
struct kvm_arch *vm_data = &kvm->arch;
+ if (!avic)
+ return;
+
avic_free_vm_id(vm_data->avic_vm_id);
if (vm_data->avic_logical_id_table_page)
@@ -1531,6 +1559,8 @@
u32 dummy;
u32 eax = 1;
+ svm->spec_ctrl = 0;
+
if (!init_event) {
svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
MSR_IA32_APICBASE_ENABLE;
@@ -1640,11 +1670,17 @@
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
+ /*
+ * The vmcb page can be recycled, causing a false negative in
+ * svm_vcpu_load(). So do a full IBPB now.
+ */
+ indirect_branch_prediction_barrier();
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
int i;
if (unlikely(cpu != vcpu->cpu)) {
@@ -1673,6 +1709,10 @@
if (static_cpu_has(X86_FEATURE_RDTSCP))
wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+ if (sd->current_vmcb != svm->vmcb) {
+ sd->current_vmcb = svm->vmcb;
+ indirect_branch_prediction_barrier();
+ }
avic_vcpu_load(vcpu, cpu);
}
@@ -2149,6 +2189,8 @@
int er;
er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
if (er != EMULATE_DONE)
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
@@ -3502,6 +3544,13 @@
case MSR_VM_CR:
msr_info->data = svm->nested.vm_cr_msr;
break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_ibrs(vcpu))
+ return 1;
+
+ msr_info->data = svm->spec_ctrl;
+ break;
case MSR_IA32_UCODE_REV:
msr_info->data = 0x01000065;
break;
@@ -3593,6 +3642,49 @@
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr);
break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr->host_initiated &&
+ !guest_cpuid_has_ibrs(vcpu))
+ return 1;
+
+ /* The STIBP bit doesn't fault even if it's not advertised */
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ return 1;
+
+ svm->spec_ctrl = data;
+
+ if (!data)
+ break;
+
+ /*
+ * For non-nested:
+ * When it's written (to non-zero) for the first time, pass
+ * it through.
+ *
+ * For nested:
+ * The handling of the MSR bitmap for L2 guests is done in
+ * nested_svm_vmrun_msrpm.
+ * We update the L1 MSR bit as well since it will end up
+ * touching the MSR anyway now.
+ */
+ set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
+ break;
+ case MSR_IA32_PRED_CMD:
+ if (!msr->host_initiated &&
+ !guest_cpuid_has_ibpb(vcpu))
+ return 1;
+
+ if (data & ~PRED_CMD_IBPB)
+ return 1;
+
+ if (!data)
+ break;
+
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+ if (is_guest_mode(vcpu))
+ break;
+ set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
+ break;
case MSR_STAR:
svm->vmcb->save.star = data;
break;
@@ -4820,6 +4912,15 @@
local_irq_enable();
+ /*
+ * If this vCPU has touched SPEC_CTRL, restore the guest's value if
+ * it's non-zero. Since vmentry is serialising on affected CPUs, there
+ * is no need to worry about the conditional branch over the wrmsr
+ * being speculatively taken.
+ */
+ if (svm->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+
asm volatile (
"push %%" _ASM_BP "; \n\t"
"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
@@ -4864,6 +4965,25 @@
"mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t"
#endif
+ /*
+ * Clear host registers marked as clobbered to prevent
+ * speculative use.
+ */
+ "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
+ "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
+ "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
+ "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
+ "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
+#ifdef CONFIG_X86_64
+ "xor %%r8, %%r8 \n\t"
+ "xor %%r9, %%r9 \n\t"
+ "xor %%r10, %%r10 \n\t"
+ "xor %%r11, %%r11 \n\t"
+ "xor %%r12, %%r12 \n\t"
+ "xor %%r13, %%r13 \n\t"
+ "xor %%r14, %%r14 \n\t"
+ "xor %%r15, %%r15 \n\t"
+#endif
"pop %%" _ASM_BP
:
: [svm]"a"(svm),
@@ -4893,6 +5013,30 @@
#endif
);
+ /*
+ * We do not use IBRS in the kernel. If this vCPU has used the
+ * SPEC_CTRL MSR it may have left it on; save the value and
+ * turn it off. This is much more efficient than blindly adding
+ * it to the atomic save/restore list. Especially as the former
+ * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
+ *
+ * For non-nested case:
+ * If the L01 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ *
+ * For nested case:
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
+ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+ rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+
+ if (svm->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
#else
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5dd5664..04c1e0f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -33,6 +33,7 @@
#include <linux/slab.h>
#include <linux/tboot.h>
#include <linux/hrtimer.h>
+#include <linux/nospec.h>
#include "kvm_cache_regs.h"
#include "x86.h"
@@ -48,6 +49,7 @@
#include <asm/kexec.h>
#include <asm/apic.h>
#include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
#include "trace.h"
#include "pmu.h"
@@ -108,6 +110,14 @@
static bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);
+#define MSR_TYPE_R 1
+#define MSR_TYPE_W 2
+#define MSR_TYPE_RW 3
+
+#define MSR_BITMAP_MODE_X2APIC 1
+#define MSR_BITMAP_MODE_X2APIC_APICV 2
+#define MSR_BITMAP_MODE_LM 4
+
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
@@ -172,7 +182,6 @@
extern const ulong vmx_return;
#define NR_AUTOLOAD_MSRS 8
-#define VMCS02_POOL_SIZE 1
struct vmcs {
u32 revision_id;
@@ -190,6 +199,7 @@
struct vmcs *shadow_vmcs;
int cpu;
int launched;
+ unsigned long *msr_bitmap;
struct list_head loaded_vmcss_on_cpu_link;
};
@@ -206,7 +216,7 @@
* stored in guest memory specified by VMPTRLD, but is opaque to the guest,
* which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
* More than one of these structures may exist, if L1 runs multiple L2 guests.
- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
+ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
* underlying hardware which will be used to run L2.
* This structure is packed to ensure that its layout is identical across
* machines (necessary for live migration).
@@ -385,13 +395,6 @@
*/
#define VMCS12_SIZE 0x1000
-/* Used to remember the last vmcs02 used for some recently used vmcs12s */
-struct vmcs02_list {
- struct list_head list;
- gpa_t vmptr;
- struct loaded_vmcs vmcs02;
-};
-
/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -418,15 +421,15 @@
*/
bool sync_shadow_vmcs;
- /* vmcs02_list cache of VMCSs recently used to run L2 guests */
- struct list_head vmcs02_pool;
- int vmcs02_num;
bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
+
+ struct loaded_vmcs vmcs02;
+
/*
- * Guest pages referred to in vmcs02 with host-physical pointers, so
- * we must keep them pinned while L2 runs.
+ * Guest pages referred to in the vmcs02 with host-physical
+ * pointers, so we must keep them pinned while L2 runs.
*/
struct page *apic_access_page;
struct page *virtual_apic_page;
@@ -435,8 +438,6 @@
bool pi_pending;
u16 posted_intr_nv;
- unsigned long *msr_bitmap;
-
struct hrtimer preemption_timer;
bool preemption_timer_expired;
@@ -537,6 +538,7 @@
unsigned long host_rsp;
u8 fail;
bool nmi_known_unmasked;
+ u8 msr_bitmap_mode;
u32 exit_intr_info;
u32 idt_vectoring_info;
ulong rflags;
@@ -548,6 +550,10 @@
u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
#endif
+
+ u64 arch_capabilities;
+ u64 spec_ctrl;
+
u32 vm_entry_controls_shadow;
u32 vm_exit_controls_shadow;
/*
@@ -855,13 +861,18 @@
static inline short vmcs_field_to_offset(unsigned long field)
{
- BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
+ const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
+ unsigned short offset;
- if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
- vmcs_field_to_offset_table[field] == 0)
+ BUILD_BUG_ON(size > SHRT_MAX);
+ if (field >= size)
return -ENOENT;
- return vmcs_field_to_offset_table[field];
+ field = array_index_nospec(field, size);
+ offset = vmcs_field_to_offset_table[field];
+ if (offset == 0)
+ return -ENOENT;
+ return offset;
}
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
@@ -903,6 +914,9 @@
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
static int alloc_identity_pagetable(struct kvm *kvm);
+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
+static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -922,12 +936,6 @@
static unsigned long *vmx_io_bitmap_a;
static unsigned long *vmx_io_bitmap_b;
-static unsigned long *vmx_msr_bitmap_legacy;
-static unsigned long *vmx_msr_bitmap_longmode;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
static unsigned long *vmx_vmread_bitmap;
static unsigned long *vmx_vmwrite_bitmap;
@@ -1199,6 +1207,11 @@
return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
}
+static inline bool cpu_has_vmx_invvpid(void)
+{
+ return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
+}
+
static inline bool cpu_has_vmx_ept(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -1839,6 +1852,52 @@
vmcs_write32(EXCEPTION_BITMAP, eb);
}
+/*
+ * Check if MSR is intercepted for currently loaded MSR bitmap.
+ */
+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
+{
+ unsigned long *msr_bitmap;
+ int f = sizeof(unsigned long);
+
+ if (!cpu_has_vmx_msr_bitmap())
+ return true;
+
+ msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
+
+ if (msr <= 0x1fff) {
+ return !!test_bit(msr, msr_bitmap + 0x800 / f);
+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+ msr &= 0x1fff;
+ return !!test_bit(msr, msr_bitmap + 0xc00 / f);
+ }
+
+ return true;
+}
+
+/*
+ * Check if MSR is intercepted for L01 MSR bitmap.
+ */
+static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
+{
+ unsigned long *msr_bitmap;
+ int f = sizeof(unsigned long);
+
+ if (!cpu_has_vmx_msr_bitmap())
+ return true;
+
+ msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
+
+ if (msr <= 0x1fff) {
+ return !!test_bit(msr, msr_bitmap + 0x800 / f);
+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+ msr &= 0x1fff;
+ return !!test_bit(msr, msr_bitmap + 0xc00 / f);
+ }
+
+ return true;
+}
+
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
unsigned long entry, unsigned long exit)
{
@@ -2248,6 +2307,7 @@
if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
vmcs_load(vmx->loaded_vmcs->vmcs);
+ indirect_branch_prediction_barrier();
}
if (!already_loaded) {
@@ -2516,36 +2576,6 @@
vmx->guest_msrs[from] = tmp;
}
-static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
-{
- unsigned long *msr_bitmap;
-
- if (is_guest_mode(vcpu))
- msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
- else if (cpu_has_secondary_exec_ctrls() &&
- (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
- if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
- if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
- else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
- } else {
- if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
- else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
- }
- } else {
- if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode;
- else
- msr_bitmap = vmx_msr_bitmap_legacy;
- }
-
- vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
-}
-
/*
* Set up the vmcs to automatically save and restore system
* msrs. Don't touch the 64-bit msrs if the guest is in legacy
@@ -2586,7 +2616,7 @@
vmx->save_nmsrs = save_nmsrs;
if (cpu_has_vmx_msr_bitmap())
- vmx_set_msr_bitmap(&vmx->vcpu);
+ vmx_update_msr_bitmap(&vmx->vcpu);
}
/*
@@ -2975,6 +3005,19 @@
case MSR_IA32_TSC:
msr_info->data = guest_read_tsc(vcpu);
break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_ibrs(vcpu))
+ return 1;
+
+ msr_info->data = to_vmx(vcpu)->spec_ctrl;
+ break;
+ case MSR_IA32_ARCH_CAPABILITIES:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_arch_capabilities(vcpu))
+ return 1;
+ msr_info->data = to_vmx(vcpu)->arch_capabilities;
+ break;
case MSR_IA32_SYSENTER_CS:
msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
break;
@@ -3079,6 +3122,68 @@
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info);
break;
+ case MSR_IA32_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_ibrs(vcpu))
+ return 1;
+
+ /* The STIBP bit doesn't fault even if it's not advertised */
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ return 1;
+
+ vmx->spec_ctrl = data;
+
+ if (!data)
+ break;
+
+ /*
+ * For non-nested:
+ * When it's written (to non-zero) for the first time, pass
+ * it through.
+ *
+ * For nested:
+ * The handling of the MSR bitmap for L2 guests is done in
+ * nested_vmx_merge_msr_bitmap. We should not touch the
+ * vmcs02.msr_bitmap here since it gets completely overwritten
+ * in the merging. We update the vmcs01 here for L1 as well
+ * since it will end up touching the MSR anyway now.
+ */
+ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
+ MSR_IA32_SPEC_CTRL,
+ MSR_TYPE_RW);
+ break;
+ case MSR_IA32_PRED_CMD:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_ibpb(vcpu))
+ return 1;
+
+ if (data & ~PRED_CMD_IBPB)
+ return 1;
+
+ if (!data)
+ break;
+
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+
+ /*
+ * For non-nested:
+ * When it's written (to non-zero) for the first time, pass
+ * it through.
+ *
+ * For nested:
+ * The handling of the MSR bitmap for L2 guests is done in
+ * nested_vmx_merge_msr_bitmap. We should not touch the
+ * vmcs02.msr_bitmap here since it gets completely overwritten
+ * in the merging.
+ */
+ vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
+ MSR_TYPE_W);
+ break;
+ case MSR_IA32_ARCH_CAPABILITIES:
+ if (!msr_info->host_initiated)
+ return 1;
+ vmx->arch_capabilities = data;
+ break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
@@ -3518,11 +3623,6 @@
return vmcs;
}
-static struct vmcs *alloc_vmcs(void)
-{
- return alloc_vmcs_cpu(raw_smp_processor_id());
-}
-
static void free_vmcs(struct vmcs *vmcs)
{
free_pages((unsigned long)vmcs, vmcs_config.order);
@@ -3538,9 +3638,38 @@
loaded_vmcs_clear(loaded_vmcs);
free_vmcs(loaded_vmcs->vmcs);
loaded_vmcs->vmcs = NULL;
+ if (loaded_vmcs->msr_bitmap)
+ free_page((unsigned long)loaded_vmcs->msr_bitmap);
WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
}
+static struct vmcs *alloc_vmcs(void)
+{
+ return alloc_vmcs_cpu(raw_smp_processor_id());
+}
+
+static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
+{
+ loaded_vmcs->vmcs = alloc_vmcs();
+ if (!loaded_vmcs->vmcs)
+ return -ENOMEM;
+
+ loaded_vmcs->shadow_vmcs = NULL;
+ loaded_vmcs_init(loaded_vmcs);
+
+ if (cpu_has_vmx_msr_bitmap()) {
+ loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!loaded_vmcs->msr_bitmap)
+ goto out_vmcs;
+ memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
+ }
+ return 0;
+
+out_vmcs:
+ free_loaded_vmcs(loaded_vmcs);
+ return -ENOMEM;
+}
+
static void free_kvm_area(void)
{
int cpu;
@@ -3816,6 +3945,12 @@
__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
}
+static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
+{
+ if (enable_ept)
+ vmx_flush_tlb(vcpu);
+}
+
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
{
ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -4541,10 +4676,8 @@
spin_unlock(&vmx_vpid_lock);
}
-#define MSR_TYPE_R 1
-#define MSR_TYPE_W 2
-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type)
+static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type)
{
int f = sizeof(unsigned long);
@@ -4578,8 +4711,8 @@
}
}
-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type)
+static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type)
{
int f = sizeof(unsigned long);
@@ -4613,6 +4746,15 @@
}
}
+static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type, bool value)
+{
+ if (value)
+ vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
+ else
+ vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
+}
+
/*
* If a msr is allowed by L0, we should check whether it is allowed by L1.
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
@@ -4659,58 +4801,68 @@
}
}
-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
+static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
{
- if (!longmode_only)
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
- msr, MSR_TYPE_R | MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
- msr, MSR_TYPE_R | MSR_TYPE_W);
+ u8 mode = 0;
+
+ if (cpu_has_secondary_exec_ctrls() &&
+ (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
+ mode |= MSR_BITMAP_MODE_X2APIC;
+ if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
+ mode |= MSR_BITMAP_MODE_X2APIC_APICV;
+ }
+
+ if (is_long_mode(vcpu))
+ mode |= MSR_BITMAP_MODE_LM;
+
+ return mode;
}
-static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
+#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
+
+static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
+ u8 mode)
{
- if (apicv_active) {
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
- } else {
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
+ int msr;
+
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+ msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
+ msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+ }
+
+ if (mode & MSR_BITMAP_MODE_X2APIC) {
+ /*
+ * TPR reads and writes can be virtualized even if virtual interrupt
+ * delivery is not in use.
+ */
+ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
+ if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
+ vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
+ }
}
}
-static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
+static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
{
- if (apicv_active) {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
- } else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- }
-}
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+ u8 mode = vmx_msr_bitmap_mode(vcpu);
+ u8 changed = mode ^ vmx->msr_bitmap_mode;
-static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active)
-{
- if (apicv_active) {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_W);
- } else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_W);
- }
+ if (!changed)
+ return;
+
+ vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
+ !(mode & MSR_BITMAP_MODE_LM));
+
+ if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
+ vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
+
+ vmx->msr_bitmap_mode = mode;
}
static bool vmx_get_enable_apicv(void)
@@ -4718,30 +4870,45 @@
return enable_apicv;
}
-static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ gfn_t gfn;
+
+ /*
+ * Don't need to mark the APIC access page dirty; it is never
+ * written to by the CPU during APIC virtualization.
+ */
+
+ if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
+ gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
+ kvm_vcpu_mark_page_dirty(vcpu, gfn);
+ }
+
+ if (nested_cpu_has_posted_intr(vmcs12)) {
+ gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
+ kvm_vcpu_mark_page_dirty(vcpu, gfn);
+ }
+}
+
+
+static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int max_irr;
void *vapic_page;
u16 status;
- if (vmx->nested.pi_desc &&
- vmx->nested.pi_pending) {
- vmx->nested.pi_pending = false;
- if (!pi_test_and_clear_on(vmx->nested.pi_desc))
- return 0;
+ if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
+ return;
- max_irr = find_last_bit(
- (unsigned long *)vmx->nested.pi_desc->pir, 256);
+ vmx->nested.pi_pending = false;
+ if (!pi_test_and_clear_on(vmx->nested.pi_desc))
+ return;
- if (max_irr == 256)
- return 0;
-
+ max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
+ if (max_irr != 256) {
vapic_page = kmap(vmx->nested.virtual_apic_page);
- if (!vapic_page) {
- WARN_ON(1);
- return -ENOMEM;
- }
__kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
kunmap(vmx->nested.virtual_apic_page);
@@ -4752,7 +4919,8 @@
vmcs_write16(GUEST_INTR_STATUS, status);
}
}
- return 0;
+
+ nested_mark_vmcs12_pages_dirty(vcpu);
}
static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
@@ -4799,14 +4967,15 @@
if (is_guest_mode(vcpu) &&
vector == vmx->nested.posted_intr_nv) {
- /* the PIR and ON have been set by L1. */
- kvm_vcpu_trigger_posted_interrupt(vcpu);
/*
* If a posted intr is not recognized by hardware,
* we will accomplish it in the next vmentry.
*/
vmx->nested.pi_pending = true;
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ /* the PIR and ON have been set by L1. */
+ if (!kvm_vcpu_trigger_posted_interrupt(vcpu))
+ kvm_vcpu_kick(vcpu);
return 0;
}
return -1;
@@ -4939,7 +5108,7 @@
}
if (cpu_has_vmx_msr_bitmap())
- vmx_set_msr_bitmap(vcpu);
+ vmx_update_msr_bitmap(vcpu);
}
static u32 vmx_exec_control(struct vcpu_vmx *vmx)
@@ -5028,7 +5197,7 @@
vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
}
if (cpu_has_vmx_msr_bitmap())
- vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
+ vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
@@ -5102,6 +5271,8 @@
++vmx->nmsrs;
}
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
@@ -5130,6 +5301,7 @@
u64 cr0;
vmx->rmode.vm86_active = 0;
+ vmx->spec_ctrl = 0;
vmx->soft_vnmi_blocked = 0;
@@ -5174,7 +5346,7 @@
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
}
- vmcs_writel(GUEST_RFLAGS, 0x02);
+ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0);
vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -5502,6 +5674,8 @@
return 1;
}
er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
if (er != EMULATE_DONE)
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
@@ -6235,7 +6409,7 @@
if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
return 1;
- err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
+ err = emulate_instruction(vcpu, 0);
if (err == EMULATE_USER_EXIT) {
++vcpu->stat.mmio_exits;
@@ -6357,7 +6531,7 @@
static __init int hardware_setup(void)
{
- int r = -ENOMEM, i, msr;
+ int r = -ENOMEM, i;
rdmsrl_safe(MSR_EFER, &host_efer);
@@ -6372,67 +6546,33 @@
if (!vmx_io_bitmap_b)
goto out;
- vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy)
- goto out1;
-
- vmx_msr_bitmap_legacy_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic)
- goto out2;
-
- vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
- goto out3;
-
- vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode)
- goto out4;
-
- vmx_msr_bitmap_longmode_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic)
- goto out5;
-
- vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
- goto out6;
-
vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_vmread_bitmap)
- goto out7;
+ goto out1;
vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_vmwrite_bitmap)
- goto out8;
+ goto out2;
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
- /*
- * Allow direct access to the PC debug port (it is often used for I/O
- * delays, but the vmexits simply slow things down).
- */
memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
- clear_bit(0x80, vmx_io_bitmap_a);
memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
- memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
- memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
- goto out9;
+ goto out3;
}
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
- if (!cpu_has_vmx_vpid())
+ if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
+ !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;
+
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
if (enable_shadow_vmcs)
@@ -6480,47 +6620,8 @@
kvm_tsc_scaling_ratio_frac_bits = 48;
}
- vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
- vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
- vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-
- memcpy(vmx_msr_bitmap_legacy_x2apic,
- vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic,
- vmx_msr_bitmap_longmode, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- vmx_msr_bitmap_longmode, PAGE_SIZE);
-
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
- /*
- * enable_apicv && kvm_vcpu_apicv_active()
- */
- for (msr = 0x800; msr <= 0x8ff; msr++)
- vmx_disable_intercept_msr_read_x2apic(msr, true);
-
- /* TMCCT */
- vmx_enable_intercept_msr_read_x2apic(0x839, true);
- /* TPR */
- vmx_disable_intercept_msr_write_x2apic(0x808, true);
- /* EOI */
- vmx_disable_intercept_msr_write_x2apic(0x80b, true);
- /* SELF-IPI */
- vmx_disable_intercept_msr_write_x2apic(0x83f, true);
-
- /*
- * (enable_apicv && !kvm_vcpu_apicv_active()) ||
- * !enable_apicv
- */
- /* TPR */
- vmx_disable_intercept_msr_read_x2apic(0x808, false);
- vmx_disable_intercept_msr_write_x2apic(0x808, false);
-
if (enable_ept) {
kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
@@ -6566,22 +6667,10 @@
return alloc_kvm_area();
-out9:
- free_page((unsigned long)vmx_vmwrite_bitmap);
-out8:
- free_page((unsigned long)vmx_vmread_bitmap);
-out7:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
-out6:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out5:
- free_page((unsigned long)vmx_msr_bitmap_longmode);
-out4:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
out3:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+ free_page((unsigned long)vmx_vmwrite_bitmap);
out2:
- free_page((unsigned long)vmx_msr_bitmap_legacy);
+ free_page((unsigned long)vmx_vmread_bitmap);
out1:
free_page((unsigned long)vmx_io_bitmap_b);
out:
@@ -6592,12 +6681,6 @@
static __exit void hardware_unsetup(void)
{
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_legacy);
- free_page((unsigned long)vmx_msr_bitmap_longmode);
free_page((unsigned long)vmx_io_bitmap_b);
free_page((unsigned long)vmx_io_bitmap_a);
free_page((unsigned long)vmx_vmwrite_bitmap);
@@ -6645,94 +6728,6 @@
}
/*
- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
- * We could reuse a single VMCS for all the L2 guests, but we also want the
- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
- * allows keeping them loaded on the processor, and in the future will allow
- * optimizations where prepare_vmcs02 doesn't need to set all the fields on
- * every entry if they never change.
- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
- *
- * The following functions allocate and free a vmcs02 in this pool.
- */
-
-/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
-static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
-{
- struct vmcs02_list *item;
- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
- if (item->vmptr == vmx->nested.current_vmptr) {
- list_move(&item->list, &vmx->nested.vmcs02_pool);
- return &item->vmcs02;
- }
-
- if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
- /* Recycle the least recently used VMCS. */
- item = list_last_entry(&vmx->nested.vmcs02_pool,
- struct vmcs02_list, list);
- item->vmptr = vmx->nested.current_vmptr;
- list_move(&item->list, &vmx->nested.vmcs02_pool);
- return &item->vmcs02;
- }
-
- /* Create a new VMCS */
- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
- if (!item)
- return NULL;
- item->vmcs02.vmcs = alloc_vmcs();
- item->vmcs02.shadow_vmcs = NULL;
- if (!item->vmcs02.vmcs) {
- kfree(item);
- return NULL;
- }
- loaded_vmcs_init(&item->vmcs02);
- item->vmptr = vmx->nested.current_vmptr;
- list_add(&(item->list), &(vmx->nested.vmcs02_pool));
- vmx->nested.vmcs02_num++;
- return &item->vmcs02;
-}
-
-/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
-static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
-{
- struct vmcs02_list *item;
- list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
- if (item->vmptr == vmptr) {
- free_loaded_vmcs(&item->vmcs02);
- list_del(&item->list);
- kfree(item);
- vmx->nested.vmcs02_num--;
- return;
- }
-}
-
-/*
- * Free all VMCSs saved for this vcpu, except the one pointed by
- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
- * must be &vmx->vmcs01.
- */
-static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
-{
- struct vmcs02_list *item, *n;
-
- WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
- list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
- /*
- * Something will leak if the above WARN triggers. Better than
- * a use-after-free.
- */
- if (vmx->loaded_vmcs == &item->vmcs02)
- continue;
-
- free_loaded_vmcs(&item->vmcs02);
- list_del(&item->list);
- kfree(item);
- vmx->nested.vmcs02_num--;
- }
-}
-
-/*
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
* set the success or error code of an emulated VMX instruction, as specified
* by Vol 2B, VMX Instruction Reference, "Conventions".
@@ -7006,6 +7001,7 @@
struct vmcs *shadow_vmcs;
const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+ int r;
/* The Intel VMX Instruction Reference lists a bunch of bits that
* are prerequisite to running VMXON, most notably cr4.VMXE must be
@@ -7045,12 +7041,9 @@
return 1;
}
- if (cpu_has_vmx_msr_bitmap()) {
- vmx->nested.msr_bitmap =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx->nested.msr_bitmap)
- goto out_msr_bitmap;
- }
+ r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
+ if (r < 0)
+ goto out_vmcs02;
vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
if (!vmx->nested.cached_vmcs12)
@@ -7067,9 +7060,6 @@
vmx->vmcs01.shadow_vmcs = shadow_vmcs;
}
- INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
- vmx->nested.vmcs02_num = 0;
-
hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL_PINNED);
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
@@ -7084,9 +7074,9 @@
kfree(vmx->nested.cached_vmcs12);
out_cached_vmcs12:
- free_page((unsigned long)vmx->nested.msr_bitmap);
+ free_loaded_vmcs(&vmx->nested.vmcs02);
-out_msr_bitmap:
+out_vmcs02:
return -ENOMEM;
}
@@ -7162,17 +7152,13 @@
vmx->nested.vmxon = false;
free_vpid(vmx->nested.vpid02);
nested_release_vmcs12(vmx);
- if (vmx->nested.msr_bitmap) {
- free_page((unsigned long)vmx->nested.msr_bitmap);
- vmx->nested.msr_bitmap = NULL;
- }
if (enable_shadow_vmcs) {
vmcs_clear(vmx->vmcs01.shadow_vmcs);
free_vmcs(vmx->vmcs01.shadow_vmcs);
vmx->vmcs01.shadow_vmcs = NULL;
}
kfree(vmx->nested.cached_vmcs12);
- /* Unpin physical memory we referred to in current vmcs02 */
+ /* Unpin physical memory we referred to in the vmcs02 */
if (vmx->nested.apic_access_page) {
nested_release_page(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
@@ -7188,7 +7174,7 @@
vmx->nested.pi_desc = NULL;
}
- nested_free_all_saved_vmcss(vmx);
+ free_loaded_vmcs(&vmx->nested.vmcs02);
}
/* Emulate the VMXOFF instruction */
@@ -7206,9 +7192,8 @@
static int handle_vmclear(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 zero = 0;
gpa_t vmptr;
- struct vmcs12 *vmcs12;
- struct page *page;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -7219,24 +7204,9 @@
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vmx);
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL) {
- /*
- * For accurate processor emulation, VMCLEAR beyond available
- * physical memory should do nothing at all. However, it is
- * possible that a nested vmx bug, not a guest hypervisor bug,
- * resulted in this case, so let's shut down before doing any
- * more damage:
- */
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
- return 1;
- }
- vmcs12 = kmap(page);
- vmcs12->launch_state = 0;
- kunmap(page);
- nested_release_page(page);
-
- nested_free_vmcs02(vmx, vmptr);
+ kvm_vcpu_write_guest(vcpu,
+ vmptr + offsetof(struct vmcs12, launch_state),
+ &zero, sizeof(zero));
skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
@@ -8024,6 +7994,19 @@
vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
KVM_ISA_VMX);
+ /*
+ * The host physical addresses of some pages of guest memory
+ * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
+ * Page). The CPU may write to these pages via their host
+ * physical address while L2 is running, bypassing any
+ * address-translation-based dirty tracking (e.g. EPT write
+ * protection).
+ *
+ * Mark them dirty on every exit from L2 to prevent them from
+ * getting out of sync with dirty tracking.
+ */
+ nested_mark_vmcs12_pages_dirty(vcpu);
+
if (vmx->nested.nested_run_pending)
return false;
@@ -8511,10 +8494,11 @@
} else {
sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ vmx_flush_tlb_ept_only(vcpu);
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
- vmx_set_msr_bitmap(vcpu);
+ vmx_update_msr_bitmap(vcpu);
}
static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
@@ -8536,8 +8520,10 @@
*/
if (!is_guest_mode(vcpu) ||
!nested_cpu_has2(get_vmcs12(&vmx->vcpu),
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
vmcs_write64(APIC_ACCESS_ADDR, hpa);
+ vmx_flush_tlb_ept_only(vcpu);
+ }
}
static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
@@ -8667,14 +8653,14 @@
#endif
"pushf\n\t"
__ASM_SIZE(push) " $%c[cs]\n\t"
- "call *%[entry]\n\t"
+ CALL_NOSPEC
:
#ifdef CONFIG_X86_64
[sp]"=&r"(tmp),
#endif
ASM_CALL_CONSTRAINT
:
- [entry]"r"(entry),
+ THUNK_TARGET(entry),
[ss]"i"(__KERNEL_DS),
[cs]"i"(__KERNEL_CS)
);
@@ -8900,6 +8886,15 @@
vmx_arm_hv_timer(vcpu);
+ /*
+ * If this vCPU has touched SPEC_CTRL, restore the guest's value if
+ * it's non-zero. Since vmentry is serialising on affected CPUs, there
+ * is no need to worry about the conditional branch over the wrmsr
+ * being speculatively taken.
+ */
+ if (vmx->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -8948,6 +8943,7 @@
/* Save guest registers, load host registers, keep flags */
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
"pop %0 \n\t"
+ "setbe %c[fail](%0)\n\t"
"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
@@ -8964,12 +8960,23 @@
"mov %%r13, %c[r13](%0) \n\t"
"mov %%r14, %c[r14](%0) \n\t"
"mov %%r15, %c[r15](%0) \n\t"
+ "xor %%r8d, %%r8d \n\t"
+ "xor %%r9d, %%r9d \n\t"
+ "xor %%r10d, %%r10d \n\t"
+ "xor %%r11d, %%r11d \n\t"
+ "xor %%r12d, %%r12d \n\t"
+ "xor %%r13d, %%r13d \n\t"
+ "xor %%r14d, %%r14d \n\t"
+ "xor %%r15d, %%r15d \n\t"
#endif
"mov %%cr2, %%" _ASM_AX " \n\t"
"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
+ "xor %%eax, %%eax \n\t"
+ "xor %%ebx, %%ebx \n\t"
+ "xor %%esi, %%esi \n\t"
+ "xor %%edi, %%edi \n\t"
"pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
- "setbe %c[fail](%0) \n\t"
".pushsection .rodata \n\t"
".global vmx_return \n\t"
"vmx_return: " _ASM_PTR " 2b \n\t"
@@ -9006,6 +9013,30 @@
#endif
);
+ /*
+ * We do not use IBRS in the kernel. If this vCPU has used the
+ * SPEC_CTRL MSR it may have left it on; save the value and
+ * turn it off. This is much more efficient than blindly adding
+ * it to the atomic save/restore list. Especially as the former
+ * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
+ *
+ * For non-nested case:
+ * If the L01 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ *
+ * For nested case:
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
+ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+ rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+
+ if (vmx->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (debugctlmsr)
update_debugctlmsr(debugctlmsr);
@@ -9116,6 +9147,7 @@
{
int err;
struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ unsigned long *msr_bitmap;
int cpu;
if (!vmx)
@@ -9148,17 +9180,24 @@
if (!vmx->guest_msrs)
goto free_pml;
- vmx->loaded_vmcs = &vmx->vmcs01;
- vmx->loaded_vmcs->vmcs = alloc_vmcs();
- vmx->loaded_vmcs->shadow_vmcs = NULL;
- if (!vmx->loaded_vmcs->vmcs)
- goto free_msrs;
if (!vmm_exclusive)
kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
- loaded_vmcs_init(vmx->loaded_vmcs);
+ err = alloc_loaded_vmcs(&vmx->vmcs01);
if (!vmm_exclusive)
kvm_cpu_vmxoff();
+ if (err < 0)
+ goto free_msrs;
+ msr_bitmap = vmx->vmcs01.msr_bitmap;
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
+ vmx->msr_bitmap_mode = 0;
+
+ vmx->loaded_vmcs = &vmx->vmcs01;
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
vmx->vcpu.cpu = cpu;
@@ -9552,23 +9591,31 @@
int msr;
struct page *page;
unsigned long *msr_bitmap_l1;
- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
+ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
+ /*
+ * pred_cmd & spec_ctrl are trying to verify two things:
+ *
+ * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
+ * ensures that we do not accidentally generate an L02 MSR bitmap
+ * from the L12 MSR bitmap that is too permissive.
+ * 2. That L1 or L2s have actually used the MSR. This avoids
+ * unnecessarily merging of the bitmap if the MSR is unused. This
+ * works properly because we only update the L01 MSR bitmap lazily.
+ * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
+ * updated to reflect this when L1 (or its L2s) actually write to
+ * the MSR.
+ */
+ bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
+ bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
- /* This shortcut is ok because we support only x2APIC MSRs so far. */
- if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
+ if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
+ !pred_cmd && !spec_ctrl)
return false;
page = nested_get_page(vcpu, vmcs12->msr_bitmap);
- if (!page) {
- WARN_ON(1);
+ if (!page)
return false;
- }
msr_bitmap_l1 = (unsigned long *)kmap(page);
- if (!msr_bitmap_l1) {
- nested_release_page_clean(page);
- WARN_ON(1);
- return false;
- }
memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
@@ -9595,6 +9642,19 @@
MSR_TYPE_W);
}
}
+
+ if (spec_ctrl)
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_SPEC_CTRL,
+ MSR_TYPE_R | MSR_TYPE_W);
+
+ if (pred_cmd)
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PRED_CMD,
+ MSR_TYPE_W);
+
kunmap(page);
nested_release_page_clean(page);
@@ -10074,6 +10134,9 @@
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
+ if (cpu_has_vmx_msr_bitmap())
+ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
+
if (enable_vpid) {
/*
* There is no direct mapping between vpid02 and vpid12, the
@@ -10111,6 +10174,9 @@
if (nested_cpu_has_ept(vmcs12)) {
kvm_mmu_unload(vcpu);
nested_ept_init_mmu_context(vcpu);
+ } else if (nested_cpu_has2(vmcs12,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ vmx_flush_tlb_ept_only(vcpu);
}
if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
@@ -10166,7 +10232,6 @@
struct vmcs12 *vmcs12;
struct vcpu_vmx *vmx = to_vmx(vcpu);
int cpu;
- struct loaded_vmcs *vmcs02;
bool ia32e;
u32 msr_entry_idx;
@@ -10306,17 +10371,13 @@
* the nested entry.
*/
- vmcs02 = nested_get_current_vmcs02(vmx);
- if (!vmcs02)
- return -ENOMEM;
-
enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
cpu = get_cpu();
- vmx->loaded_vmcs = vmcs02;
+ vmx->loaded_vmcs = &vmx->nested.vmcs02;
vmx_vcpu_put(vcpu);
vmx_vcpu_load(vcpu, cpu);
vcpu->cpu = cpu;
@@ -10468,7 +10529,8 @@
return 0;
}
- return vmx_complete_nested_posted_interrupt(vcpu);
+ vmx_complete_nested_posted_interrupt(vcpu);
+ return 0;
}
static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
@@ -10779,7 +10841,7 @@
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
if (cpu_has_vmx_msr_bitmap())
- vmx_set_msr_bitmap(vcpu);
+ vmx_update_msr_bitmap(vcpu);
if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
vmcs12->vm_exit_msr_load_count))
@@ -10830,10 +10892,6 @@
vm_exit_controls_reset_shadow(vmx);
vmx_segment_cache_clear(vmx);
- /* if no vmcs02 cache requested, remove the one we used */
- if (VMCS02_POOL_SIZE == 0)
- nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
-
load_vmcs12_host_state(vcpu, vmcs12);
/* Update any VMCS fields that might have changed while L2 ran */
@@ -10851,6 +10909,10 @@
vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
vmx_set_virtual_x2apic_mode(vcpu,
vcpu->arch.apic_base & X2APIC_ENABLE);
+ } else if (!nested_cpu_has_ept(vmcs12) &&
+ nested_cpu_has2(vmcs12,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ vmx_flush_tlb_ept_only(vcpu);
}
/* This is needed for same reason as it was needed in prepare_vmcs02 */
@@ -10900,8 +10962,10 @@
*/
static void vmx_leave_nested(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu))
+ if (is_guest_mode(vcpu)) {
+ to_vmx(vcpu)->nested.nested_run_pending = 0;
nested_vmx_vmexit(vcpu, -1, 0, 0);
+ }
free_nested(to_vmx(vcpu));
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 595f814..75f756e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -773,7 +773,8 @@
return 1;
/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
- if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
+ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) ||
+ !is_long_mode(vcpu))
return 1;
}
@@ -974,6 +975,7 @@
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
+ MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
};
static unsigned num_msrs_to_save;
@@ -1750,10 +1752,13 @@
/* both __this_cpu_read() and rdtsc() should be on the same cpu */
get_cpu();
- kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
- &hv_clock.tsc_shift,
- &hv_clock.tsc_to_system_mul);
- ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+ if (__this_cpu_read(cpu_tsc_khz)) {
+ kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
+ &hv_clock.tsc_shift,
+ &hv_clock.tsc_to_system_mul);
+ ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+ } else
+ ret = ktime_get_boot_ns() + ka->kvmclock_offset;
put_cpu();
@@ -1797,6 +1802,9 @@
*/
BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+ if (guest_hv_clock.version & 1)
+ ++guest_hv_clock.version; /* first time write, random junk */
+
vcpu->hv_clock.version = guest_hv_clock.version + 1;
kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
&vcpu->hv_clock,
@@ -4260,7 +4268,7 @@
addr, n, v))
&& kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
break;
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
handled += n;
addr += n;
len -= n;
@@ -4513,7 +4521,7 @@
{
if (vcpu->mmio_read_completed) {
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_fragments[0].gpa, *(u64 *)val);
+ vcpu->mmio_fragments[0].gpa, val);
vcpu->mmio_read_completed = 0;
return 1;
}
@@ -4535,14 +4543,14 @@
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
return vcpu_mmio_write(vcpu, gpa, bytes, val);
}
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
return X86EMUL_IO_NEEDED;
}
@@ -5304,7 +5312,7 @@
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
- r = EMULATE_FAIL;
+ r = EMULATE_USER_EXIT;
}
kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5576,6 +5584,8 @@
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
emulation_type))
return EMULATE_DONE;
+ if (ctxt->have_exception && inject_emulated_exception(vcpu))
+ return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
return EMULATE_FAIL;
return handle_emulation_failure(vcpu);
@@ -6521,6 +6531,20 @@
kvm_x86_ops->tlb_flush(vcpu);
}
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
+{
+ unsigned long apic_address;
+
+ /*
+ * The physical address of apic access page is stored in the VMCS.
+ * Update it when it becomes invalid.
+ */
+ apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (start <= apic_address && apic_address < end)
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
struct page *page = NULL;
@@ -7113,7 +7137,7 @@
#endif
kvm_rip_write(vcpu, regs->rip);
- kvm_set_rflags(vcpu, regs->rflags);
+ kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
vcpu->arch.exception.pending = false;
@@ -8424,11 +8448,11 @@
{
struct x86_exception fault;
- trace_kvm_async_pf_ready(work->arch.token, work->gva);
if (work->wakeup_all)
work->arch.token = ~0; /* broadcast wakeup */
else
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
+ trace_kvm_async_pf_ready(work->arch.token, work->gva);
if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 34a7413..4ad7c4d 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -25,6 +25,8 @@
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o
+OBJECT_FILES_NON_STANDARD_retpoline.o :=y
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4d34bb5..46e71a7 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -29,7 +29,8 @@
#include <asm/errno.h>
#include <asm/asm.h>
#include <asm/export.h>
-
+#include <asm/nospec-branch.h>
+
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
@@ -156,7 +157,7 @@
negl %ebx
lea 45f(%ebx,%ebx,2), %ebx
testl %esi, %esi
- jmp *%ebx
+ JMP_NOSPEC %ebx
# Handle 2-byte-aligned regions
20: addw (%esi), %ax
@@ -439,7 +440,7 @@
andl $-32,%edx
lea 3f(%ebx,%ebx), %ebx
testl %esi, %esi
- jmp *%ebx
+ JMP_NOSPEC %ebx
1: addl $64,%esi
addl $64,%edi
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 5cc78bf..3261abb 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -104,7 +104,112 @@
return 0; /* Buffer overrun */
}
+/*
+ * Find a non-boolean option (i.e. option=argument). In accordance with
+ * standard Linux practice, if this option is repeated, this returns the
+ * last instance on the command line.
+ *
+ * @cmdline: the cmdline string
+ * @max_cmdline_size: the maximum size of cmdline
+ * @option: option string to look for
+ * @buffer: memory buffer to return the option argument
+ * @bufsize: size of the supplied memory buffer
+ *
+ * Returns the length of the argument (regardless of if it was
+ * truncated to fit in the buffer), or -1 on not found.
+ */
+static int
+__cmdline_find_option(const char *cmdline, int max_cmdline_size,
+ const char *option, char *buffer, int bufsize)
+{
+ char c;
+ int pos = 0, len = -1;
+ const char *opptr = NULL;
+ char *bufptr = buffer;
+ enum {
+ st_wordstart = 0, /* Start of word/after whitespace */
+ st_wordcmp, /* Comparing this word */
+ st_wordskip, /* Miscompare, skip */
+ st_bufcpy, /* Copying this to buffer */
+ } state = st_wordstart;
+
+ if (!cmdline)
+ return -1; /* No command line */
+
+ /*
+ * This 'pos' check ensures we do not overrun
+ * a non-NULL-terminated 'cmdline'
+ */
+ while (pos++ < max_cmdline_size) {
+ c = *(char *)cmdline++;
+ if (!c)
+ break;
+
+ switch (state) {
+ case st_wordstart:
+ if (myisspace(c))
+ break;
+
+ state = st_wordcmp;
+ opptr = option;
+ /* fall through */
+
+ case st_wordcmp:
+ if ((c == '=') && !*opptr) {
+ /*
+ * We matched all the way to the end of the
+ * option we were looking for, prepare to
+ * copy the argument.
+ */
+ len = 0;
+ bufptr = buffer;
+ state = st_bufcpy;
+ break;
+ } else if (c == *opptr++) {
+ /*
+ * We are currently matching, so continue
+ * to the next character on the cmdline.
+ */
+ break;
+ }
+ state = st_wordskip;
+ /* fall through */
+
+ case st_wordskip:
+ if (myisspace(c))
+ state = st_wordstart;
+ break;
+
+ case st_bufcpy:
+ if (myisspace(c)) {
+ state = st_wordstart;
+ } else {
+ /*
+ * Increment len, but don't overrun the
+ * supplied buffer and leave room for the
+ * NULL terminator.
+ */
+ if (++len < bufsize)
+ *bufptr++ = c;
+ }
+ break;
+ }
+ }
+
+ if (bufsize)
+ *bufptr = '\0';
+
+ return len;
+}
+
int cmdline_find_option_bool(const char *cmdline, const char *option)
{
return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
}
+
+int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
+ int bufsize)
+{
+ return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
+ buffer, bufsize);
+}
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 073d1f1..9758524 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -93,6 +93,13 @@
{
u64 start, end, delay, loops = __loops;
+ /*
+ * Timer value of 0 causes MWAITX to wait indefinitely, unless there
+ * is a store on the memory monitored by MONITORX.
+ */
+ if (loops == 0)
+ return;
+
start = rdtsc_ordered();
for (;;) {
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 37b62d4..b12b214 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -39,6 +39,8 @@
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
1: movzbl (%_ASM_AX),%edx
xor %eax,%eax
@@ -53,6 +55,8 @@
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
2: movzwl -1(%_ASM_AX),%edx
xor %eax,%eax
@@ -67,6 +71,8 @@
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
3: movl -3(%_ASM_AX),%edx
xor %eax,%eax
@@ -82,6 +88,8 @@
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
4: movq -7(%_ASM_AX),%rdx
xor %eax,%eax
@@ -93,6 +101,8 @@
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user_8
+ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
+ and %_ASM_DX, %_ASM_AX
ASM_STAC
4: movl -7(%_ASM_AX),%edx
5: movl -3(%_ASM_AX),%ecx
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
new file mode 100644
index 0000000..480edc3
--- /dev/null
+++ b/arch/x86/lib/retpoline.S
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+#include <asm/export.h>
+#include <asm/nospec-branch.h>
+#include <asm/bitsperlong.h>
+
+.macro THUNK reg
+ .section .text.__x86.indirect_thunk
+
+ENTRY(__x86_indirect_thunk_\reg)
+ CFI_STARTPROC
+ JMP_NOSPEC %\reg
+ CFI_ENDPROC
+ENDPROC(__x86_indirect_thunk_\reg)
+.endm
+
+/*
+ * Despite being an assembler file we can't just use .irp here
+ * because __KSYM_DEPS__ only uses the C preprocessor and would
+ * only see one instance of "__x86_indirect_thunk_\reg" rather
+ * than one per register with the correct names. So we do it
+ * the simple and nasty way...
+ */
+#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
+
+GENERATE_THUNK(_ASM_AX)
+GENERATE_THUNK(_ASM_BX)
+GENERATE_THUNK(_ASM_CX)
+GENERATE_THUNK(_ASM_DX)
+GENERATE_THUNK(_ASM_SI)
+GENERATE_THUNK(_ASM_DI)
+GENERATE_THUNK(_ASM_BP)
+#ifdef CONFIG_64BIT
+GENERATE_THUNK(r8)
+GENERATE_THUNK(r9)
+GENERATE_THUNK(r10)
+GENERATE_THUNK(r11)
+GENERATE_THUNK(r12)
+GENERATE_THUNK(r13)
+GENERATE_THUNK(r14)
+GENERATE_THUNK(r15)
+#endif
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version - two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+.macro STUFF_RSB nr:req sp:req
+ mov $(\nr / 2), %_ASM_BX
+ .align 16
+771:
+ call 772f
+773: /* speculation trap */
+ pause
+ lfence
+ jmp 773b
+ .align 16
+772:
+ call 774f
+775: /* speculation trap */
+ pause
+ lfence
+ jmp 775b
+ .align 16
+774:
+ dec %_ASM_BX
+ jnz 771b
+ add $((BITS_PER_LONG/8) * \nr), \sp
+.endm
+
+#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+
+ENTRY(__fill_rsb)
+ STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
+ ret
+END(__fill_rsb)
+EXPORT_SYMBOL_GPL(__fill_rsb)
+
+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+
+ENTRY(__clear_rsb)
+ STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
+ ret
+END(__clear_rsb)
+EXPORT_SYMBOL_GPL(__clear_rsb)
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 3bc7baf..5c06dbf 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -570,12 +570,12 @@
unsigned long __copy_to_user_ll(void __user *to, const void *from,
unsigned long n)
{
- stac();
+ __uaccess_begin_nospec();
if (movsl_is_ok(to, from, n))
__copy_user(to, from, n);
else
n = __copy_user_intel(to, from, n);
- clac();
+ __uaccess_end();
return n;
}
EXPORT_SYMBOL(__copy_to_user_ll);
@@ -627,7 +627,7 @@
unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
unsigned long n)
{
- stac();
+ __uaccess_begin_nospec();
#ifdef CONFIG_X86_INTEL_USERCOPY
if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
n = __copy_user_intel_nocache(to, from, n);
@@ -636,7 +636,7 @@
#else
__copy_user(to, from, n);
#endif
- clac();
+ __uaccess_end();
return n;
}
EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 96d2b84..c548b46 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -37,5 +37,5 @@
obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
-obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
-
+obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+obj-$(CONFIG_PAGE_TABLE_ISOLATION) += kaiser.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index c228ec8..3d7f60f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -191,14 +191,15 @@
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
* faulted on a pte with its pkey=4.
*/
-static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
+static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info,
+ u32 *pkey)
{
/* This is effectively an #ifdef */
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return;
/* Fault not from Protection Keys: nothing to do */
- if (si_code != SEGV_PKUERR)
+ if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV))
return;
/*
* force_sig_info_fault() is called from a number of
@@ -237,7 +238,7 @@
lsb = PAGE_SHIFT;
info.si_addr_lsb = lsb;
- fill_sig_info_pkey(si_code, &info, pkey);
+ fill_sig_info_pkey(si_signo, si_code, &info, pkey);
force_sig_info(si_signo, &info, tsk);
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 889e761..f92bdb9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -177,7 +177,7 @@
cr4_set_bits_and_update_boot(X86_CR4_PSE);
/* Enable PGE if available */
- if (boot_cpu_has(X86_FEATURE_PGE)) {
+ if (boot_cpu_has(X86_FEATURE_PGE) && !kaiser_enabled) {
cr4_set_bits_and_update_boot(X86_CR4_PGE);
__supported_pte_mask |= _PAGE_GLOBAL;
} else
@@ -764,13 +764,11 @@
}
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
-#ifdef CONFIG_SMP
.active_mm = &init_mm,
.state = 0,
-#endif
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
};
-EXPORT_SYMBOL_GPL(cpu_tlbstate);
+EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3e27ded..7df8e3a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -324,6 +324,16 @@
continue;
if (vaddr < (unsigned long) _text || vaddr > end)
set_pmd(pmd, __pmd(0));
+ else if (kaiser_enabled) {
+ /*
+ * level2_kernel_pgt is initialized with _PAGE_GLOBAL:
+ * clear that now. This is not important, so long as
+ * CR4.PGE remains clear, but it removes an anomaly.
+ * Physical mapping setup below avoids _PAGE_GLOBAL
+ * by use of massage_pgprot() inside pfn_pte() etc.
+ */
+ set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL));
+ }
}
}
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
new file mode 100644
index 0000000..ec678aa
--- /dev/null
+++ b/arch/x86/mm/kaiser.c
@@ -0,0 +1,483 @@
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
+
+#include <asm/kaiser.h>
+#include <asm/tlbflush.h> /* to verify its kaiser declarations */
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/desc.h>
+#include <asm/cmdline.h>
+#include <asm/vsyscall.h>
+
+int kaiser_enabled __read_mostly = 1;
+EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */
+
+__visible
+DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+/*
+ * These can have bit 63 set, so we can not just use a plain "or"
+ * instruction to get their value or'd into CR3. It would take
+ * another register. So, we use a memory reference to these instead.
+ *
+ * This is also handy because systems that do not support PCIDs
+ * just end up or'ing a 0 into their CR3, which does no harm.
+ */
+DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+
+/*
+ * At runtime, the only things we map are some things for CPU
+ * hotplug, and stacks for new processes. No two CPUs will ever
+ * be populating the same addresses, so we only need to ensure
+ * that we protect between two CPUs trying to allocate and
+ * populate the same page table page.
+ *
+ * Only take this lock when doing a set_p[4um]d(), but it is not
+ * needed for doing a set_pte(). We assume that only the *owner*
+ * of a given allocation will be doing this for _their_
+ * allocation.
+ *
+ * This ensures that once a system has been running for a while
+ * and there have been stacks all over and these page tables
+ * are fully populated, there will be no further acquisitions of
+ * this lock.
+ */
+static DEFINE_SPINLOCK(shadow_table_allocation_lock);
+
+/*
+ * Returns -1 on error.
+ */
+static inline unsigned long get_pa_from_mapping(unsigned long vaddr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset_k(vaddr);
+ /*
+ * We made all the kernel PGDs present in kaiser_init().
+ * We expect them to stay that way.
+ */
+ BUG_ON(pgd_none(*pgd));
+ /*
+ * PGDs are either 512GB or 128TB on all x86_64
+ * configurations. We don't handle these.
+ */
+ BUG_ON(pgd_large(*pgd));
+
+ pud = pud_offset(pgd, vaddr);
+ if (pud_none(*pud)) {
+ WARN_ON_ONCE(1);
+ return -1;
+ }
+
+ if (pud_large(*pud))
+ return (pud_pfn(*pud) << PAGE_SHIFT) | (vaddr & ~PUD_PAGE_MASK);
+
+ pmd = pmd_offset(pud, vaddr);
+ if (pmd_none(*pmd)) {
+ WARN_ON_ONCE(1);
+ return -1;
+ }
+
+ if (pmd_large(*pmd))
+ return (pmd_pfn(*pmd) << PAGE_SHIFT) | (vaddr & ~PMD_PAGE_MASK);
+
+ pte = pte_offset_kernel(pmd, vaddr);
+ if (pte_none(*pte)) {
+ WARN_ON_ONCE(1);
+ return -1;
+ }
+
+ return (pte_pfn(*pte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
+}
+
+/*
+ * This is a relatively normal page table walk, except that it
+ * also tries to allocate page tables pages along the way.
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+static pte_t *kaiser_pagetable_walk(unsigned long address, bool user)
+{
+ pmd_t *pmd;
+ pud_t *pud;
+ pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ unsigned long prot = _KERNPG_TABLE;
+
+ if (pgd_none(*pgd)) {
+ WARN_ONCE(1, "All shadow pgds should have been populated");
+ return NULL;
+ }
+ BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
+ if (user) {
+ /*
+ * The vsyscall page is the only page that will have
+ * _PAGE_USER set. Catch everything else.
+ */
+ BUG_ON(address != VSYSCALL_ADDR);
+
+ set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+ prot = _PAGE_TABLE;
+ }
+
+ pud = pud_offset(pgd, address);
+ /* The shadow page tables do not use large mappings: */
+ if (pud_large(*pud)) {
+ WARN_ON(1);
+ return NULL;
+ }
+ if (pud_none(*pud)) {
+ unsigned long new_pmd_page = __get_free_page(gfp);
+ if (!new_pmd_page)
+ return NULL;
+ spin_lock(&shadow_table_allocation_lock);
+ if (pud_none(*pud)) {
+ set_pud(pud, __pud(prot | __pa(new_pmd_page)));
+ __inc_zone_page_state(virt_to_page((void *)
+ new_pmd_page), NR_KAISERTABLE);
+ } else
+ free_page(new_pmd_page);
+ spin_unlock(&shadow_table_allocation_lock);
+ }
+
+ pmd = pmd_offset(pud, address);
+ /* The shadow page tables do not use large mappings: */
+ if (pmd_large(*pmd)) {
+ WARN_ON(1);
+ return NULL;
+ }
+ if (pmd_none(*pmd)) {
+ unsigned long new_pte_page = __get_free_page(gfp);
+ if (!new_pte_page)
+ return NULL;
+ spin_lock(&shadow_table_allocation_lock);
+ if (pmd_none(*pmd)) {
+ set_pmd(pmd, __pmd(prot | __pa(new_pte_page)));
+ __inc_zone_page_state(virt_to_page((void *)
+ new_pte_page), NR_KAISERTABLE);
+ } else
+ free_page(new_pte_page);
+ spin_unlock(&shadow_table_allocation_lock);
+ }
+
+ return pte_offset_kernel(pmd, address);
+}
+
+static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
+ unsigned long flags)
+{
+ int ret = 0;
+ pte_t *pte;
+ unsigned long start_addr = (unsigned long )__start_addr;
+ unsigned long address = start_addr & PAGE_MASK;
+ unsigned long end_addr = PAGE_ALIGN(start_addr + size);
+ unsigned long target_address;
+
+ /*
+ * It is convenient for callers to pass in __PAGE_KERNEL etc,
+ * and there is no actual harm from setting _PAGE_GLOBAL, so
+ * long as CR4.PGE is not set. But it is nonetheless troubling
+ * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser"
+ * requires that not to be #defined to 0): so mask it off here.
+ */
+ flags &= ~_PAGE_GLOBAL;
+ if (!(__supported_pte_mask & _PAGE_NX))
+ flags &= ~_PAGE_NX;
+
+ for (; address < end_addr; address += PAGE_SIZE) {
+ target_address = get_pa_from_mapping(address);
+ if (target_address == -1) {
+ ret = -EIO;
+ break;
+ }
+ pte = kaiser_pagetable_walk(address, flags & _PAGE_USER);
+ if (!pte) {
+ ret = -ENOMEM;
+ break;
+ }
+ if (pte_none(*pte)) {
+ set_pte(pte, __pte(flags | target_address));
+ } else {
+ pte_t tmp;
+ set_pte(&tmp, __pte(flags | target_address));
+ WARN_ON_ONCE(!pte_same(*pte, tmp));
+ }
+ }
+ return ret;
+}
+
+static int kaiser_add_user_map_ptrs(const void *start, const void *end, unsigned long flags)
+{
+ unsigned long size = end - start;
+
+ return kaiser_add_user_map(start, size, flags);
+}
+
+/*
+ * Ensure that the top level of the (shadow) page tables are
+ * entirely populated. This ensures that all processes that get
+ * forked have the same entries. This way, we do not have to
+ * ever go set up new entries in older processes.
+ *
+ * Note: we never free these, so there are no updates to them
+ * after this.
+ */
+static void __init kaiser_init_all_pgds(void)
+{
+ pgd_t *pgd;
+ int i = 0;
+
+ pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
+ for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
+ pgd_t new_pgd;
+ pud_t *pud = pud_alloc_one(&init_mm,
+ PAGE_OFFSET + i * PGDIR_SIZE);
+ if (!pud) {
+ WARN_ON(1);
+ break;
+ }
+ inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE);
+ new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
+ /*
+ * Make sure not to stomp on some other pgd entry.
+ */
+ if (!pgd_none(pgd[i])) {
+ WARN_ON(1);
+ continue;
+ }
+ set_pgd(pgd + i, new_pgd);
+ }
+}
+
+#define kaiser_add_user_map_early(start, size, flags) do { \
+ int __ret = kaiser_add_user_map(start, size, flags); \
+ WARN_ON(__ret); \
+} while (0)
+
+#define kaiser_add_user_map_ptrs_early(start, end, flags) do { \
+ int __ret = kaiser_add_user_map_ptrs(start, end, flags); \
+ WARN_ON(__ret); \
+} while (0)
+
+void __init kaiser_check_boottime_disable(void)
+{
+ bool enable = true;
+ char arg[5];
+ int ret;
+
+ if (boot_cpu_has(X86_FEATURE_XENPV))
+ goto silent_disable;
+
+ ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
+ if (ret > 0) {
+ if (!strncmp(arg, "on", 2))
+ goto enable;
+
+ if (!strncmp(arg, "off", 3))
+ goto disable;
+
+ if (!strncmp(arg, "auto", 4))
+ goto skip;
+ }
+
+ if (cmdline_find_option_bool(boot_command_line, "nopti"))
+ goto disable;
+
+skip:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ goto disable;
+
+enable:
+ if (enable)
+ setup_force_cpu_cap(X86_FEATURE_KAISER);
+
+ return;
+
+disable:
+ pr_info("disabled\n");
+
+silent_disable:
+ kaiser_enabled = 0;
+ setup_clear_cpu_cap(X86_FEATURE_KAISER);
+}
+
+/*
+ * If anything in here fails, we will likely die on one of the
+ * first kernel->user transitions and init will die. But, we
+ * will have most of the kernel up by then and should be able to
+ * get a clean warning out of it. If we BUG_ON() here, we run
+ * the risk of being before we have good console output.
+ */
+void __init kaiser_init(void)
+{
+ int cpu;
+
+ if (!kaiser_enabled)
+ return;
+
+ kaiser_init_all_pgds();
+
+ /*
+ * Note that this sets _PAGE_USER and it needs to happen when the
+ * pagetable hierarchy gets created, i.e., early. Otherwise
+ * kaiser_pagetable_walk() will encounter initialized PTEs in the
+ * hierarchy and not set the proper permissions, leading to the
+ * pagefaults with page-protection violations when trying to read the
+ * vsyscall page. For example.
+ */
+ if (vsyscall_enabled())
+ kaiser_add_user_map_early((void *)VSYSCALL_ADDR,
+ PAGE_SIZE,
+ vsyscall_pgprot);
+
+ for_each_possible_cpu(cpu) {
+ void *percpu_vaddr = __per_cpu_user_mapped_start +
+ per_cpu_offset(cpu);
+ unsigned long percpu_sz = __per_cpu_user_mapped_end -
+ __per_cpu_user_mapped_start;
+ kaiser_add_user_map_early(percpu_vaddr, percpu_sz,
+ __PAGE_KERNEL);
+ }
+
+ /*
+ * Map the entry/exit text section, which is needed at
+ * switches from user to and from kernel.
+ */
+ kaiser_add_user_map_ptrs_early(__entry_text_start, __entry_text_end,
+ __PAGE_KERNEL_RX);
+
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+ kaiser_add_user_map_ptrs_early(__irqentry_text_start,
+ __irqentry_text_end,
+ __PAGE_KERNEL_RX);
+#endif
+ kaiser_add_user_map_early((void *)idt_descr.address,
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL_RO);
+#ifdef CONFIG_TRACING
+ kaiser_add_user_map_early(&trace_idt_descr,
+ sizeof(trace_idt_descr),
+ __PAGE_KERNEL);
+ kaiser_add_user_map_early(&trace_idt_table,
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL);
+#endif
+ kaiser_add_user_map_early(&debug_idt_descr, sizeof(debug_idt_descr),
+ __PAGE_KERNEL);
+ kaiser_add_user_map_early(&debug_idt_table,
+ sizeof(gate_desc) * NR_VECTORS,
+ __PAGE_KERNEL);
+
+ pr_info("enabled\n");
+}
+
+/* Add a mapping to the shadow mapping, and synchronize the mappings */
+int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+{
+ if (!kaiser_enabled)
+ return 0;
+ return kaiser_add_user_map((const void *)addr, size, flags);
+}
+
+void kaiser_remove_mapping(unsigned long start, unsigned long size)
+{
+ extern void unmap_pud_range_nofree(pgd_t *pgd,
+ unsigned long start, unsigned long end);
+ unsigned long end = start + size;
+ unsigned long addr, next;
+ pgd_t *pgd;
+
+ if (!kaiser_enabled)
+ return;
+ pgd = native_get_shadow_pgd(pgd_offset_k(start));
+ for (addr = start; addr < end; pgd++, addr = next) {
+ next = pgd_addr_end(addr, end);
+ unmap_pud_range_nofree(pgd, addr, next);
+ }
+}
+
+/*
+ * Page table pages are page-aligned. The lower half of the top
+ * level is used for userspace and the top half for the kernel.
+ * This returns true for user pages that need to get copied into
+ * both the user and kernel copies of the page tables, and false
+ * for kernel pages that should only be in the kernel copy.
+ */
+static inline bool is_userspace_pgd(pgd_t *pgdp)
+{
+ return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2);
+}
+
+pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ if (!kaiser_enabled)
+ return pgd;
+ /*
+ * Do we need to also populate the shadow pgd? Check _PAGE_USER to
+ * skip cases like kexec and EFI which make temporary low mappings.
+ */
+ if (pgd.pgd & _PAGE_USER) {
+ if (is_userspace_pgd(pgdp)) {
+ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
+ /*
+ * Even if the entry is *mapping* userspace, ensure
+ * that userspace can not use it. This way, if we
+ * get out to userspace running on the kernel CR3,
+ * userspace will crash instead of running.
+ */
+ if (__supported_pte_mask & _PAGE_NX)
+ pgd.pgd |= _PAGE_NX;
+ }
+ } else if (!pgd.pgd) {
+ /*
+ * pgd_clear() cannot check _PAGE_USER, and is even used to
+ * clear corrupted pgd entries: so just rely on cases like
+ * kexec and EFI never to be using pgd_clear().
+ */
+ if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) &&
+ is_userspace_pgd(pgdp))
+ native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
+ }
+ return pgd;
+}
+
+void kaiser_setup_pcid(void)
+{
+ unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
+
+ if (this_cpu_has(X86_FEATURE_PCID))
+ user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
+ /*
+ * These variables are used by the entry/exit
+ * code to change PCID and pgd and TLB flushing.
+ */
+ this_cpu_write(x86_cr3_pcid_user, user_cr3);
+}
+
+/*
+ * Make a note that this cpu will need to flush USER tlb on return to user.
+ * If cpu does not have PCID, then the NOFLUSH bit will never have been set.
+ */
+void kaiser_flush_tlb_on_return_to_user(void)
+{
+ if (this_cpu_has(X86_FEATURE_PCID))
+ this_cpu_write(x86_cr3_pcid_user,
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+}
+EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index aed2064..319183d 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -189,6 +189,6 @@
*pud_tramp = *pud;
}
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
+ /* Avoid set_pgd(), in case it's complicated by CONFIG_PAGE_TABLE_ISOLATION */
+ trampoline_pgd_entry = __pgd(_KERNPG_TABLE | __pa(pud_page_tramp));
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e3353c9..73dcb0e1 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -52,6 +52,7 @@
#define CPA_FLUSHTLB 1
#define CPA_ARRAY 2
#define CPA_PAGES_ARRAY 4
+#define CPA_FREE_PAGETABLES 8
#ifdef CONFIG_PROC_FS
static unsigned long direct_pages_count[PG_LEVEL_NUM];
@@ -729,10 +730,13 @@
return 0;
}
-static bool try_to_free_pte_page(pte_t *pte)
+static bool try_to_free_pte_page(struct cpa_data *cpa, pte_t *pte)
{
int i;
+ if (!(cpa->flags & CPA_FREE_PAGETABLES))
+ return false;
+
for (i = 0; i < PTRS_PER_PTE; i++)
if (!pte_none(pte[i]))
return false;
@@ -741,10 +745,13 @@
return true;
}
-static bool try_to_free_pmd_page(pmd_t *pmd)
+static bool try_to_free_pmd_page(struct cpa_data *cpa, pmd_t *pmd)
{
int i;
+ if (!(cpa->flags & CPA_FREE_PAGETABLES))
+ return false;
+
for (i = 0; i < PTRS_PER_PMD; i++)
if (!pmd_none(pmd[i]))
return false;
@@ -753,7 +760,9 @@
return true;
}
-static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
+static bool unmap_pte_range(struct cpa_data *cpa, pmd_t *pmd,
+ unsigned long start,
+ unsigned long end)
{
pte_t *pte = pte_offset_kernel(pmd, start);
@@ -764,22 +773,23 @@
pte++;
}
- if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
+ if (try_to_free_pte_page(cpa, (pte_t *)pmd_page_vaddr(*pmd))) {
pmd_clear(pmd);
return true;
}
return false;
}
-static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
+static void __unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, pmd_t *pmd,
unsigned long start, unsigned long end)
{
- if (unmap_pte_range(pmd, start, end))
- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
+ if (unmap_pte_range(cpa, pmd, start, end))
+ if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
pud_clear(pud);
}
-static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
+static void unmap_pmd_range(struct cpa_data *cpa, pud_t *pud,
+ unsigned long start, unsigned long end)
{
pmd_t *pmd = pmd_offset(pud, start);
@@ -790,7 +800,7 @@
unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
unsigned long pre_end = min_t(unsigned long, end, next_page);
- __unmap_pmd_range(pud, pmd, start, pre_end);
+ __unmap_pmd_range(cpa, pud, pmd, start, pre_end);
start = pre_end;
pmd++;
@@ -803,7 +813,8 @@
if (pmd_large(*pmd))
pmd_clear(pmd);
else
- __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
+ __unmap_pmd_range(cpa, pud, pmd,
+ start, start + PMD_SIZE);
start += PMD_SIZE;
pmd++;
@@ -813,17 +824,19 @@
* 4K leftovers?
*/
if (start < end)
- return __unmap_pmd_range(pud, pmd, start, end);
+ return __unmap_pmd_range(cpa, pud, pmd, start, end);
/*
* Try again to free the PMD page if haven't succeeded above.
*/
if (!pud_none(*pud))
- if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
+ if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
pud_clear(pud);
}
-static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+static void __unmap_pud_range(struct cpa_data *cpa, pgd_t *pgd,
+ unsigned long start,
+ unsigned long end)
{
pud_t *pud = pud_offset(pgd, start);
@@ -834,7 +847,7 @@
unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
unsigned long pre_end = min_t(unsigned long, end, next_page);
- unmap_pmd_range(pud, start, pre_end);
+ unmap_pmd_range(cpa, pud, start, pre_end);
start = pre_end;
pud++;
@@ -848,7 +861,7 @@
if (pud_large(*pud))
pud_clear(pud);
else
- unmap_pmd_range(pud, start, start + PUD_SIZE);
+ unmap_pmd_range(cpa, pud, start, start + PUD_SIZE);
start += PUD_SIZE;
pud++;
@@ -858,7 +871,7 @@
* 2M leftovers?
*/
if (start < end)
- unmap_pmd_range(pud, start, end);
+ unmap_pmd_range(cpa, pud, start, end);
/*
* No need to try to free the PUD page because we'll free it in
@@ -866,6 +879,24 @@
*/
}
+static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+{
+ struct cpa_data cpa = {
+ .flags = CPA_FREE_PAGETABLES,
+ };
+
+ __unmap_pud_range(&cpa, pgd, start, end);
+}
+
+void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end)
+{
+ struct cpa_data cpa = {
+ .flags = 0,
+ };
+
+ __unmap_pud_range(&cpa, pgd, start, end);
+}
+
static int alloc_pte_page(pmd_t *pmd)
{
pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 3feec5a..209b946 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -344,14 +344,15 @@
kmem_cache_free(pgd_cache, pgd);
}
#else
+
static inline pgd_t *_pgd_alloc(void)
{
- return (pgd_t *)__get_free_page(PGALLOC_GFP);
+ return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
}
static inline void _pgd_free(pgd_t *pgd)
{
- free_page((unsigned long)pgd);
+ free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
}
#endif /* CONFIG_X86_PAE */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 75fb011..578973a 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,16 +6,17 @@
#include <linux/interrupt.h>
#include <linux/export.h>
#include <linux/cpu.h>
+#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
-#include <linux/debugfs.h>
+#include <asm/kaiser.h>
/*
- * Smarter SMP flushing macros.
+ * TLB flushing, formerly SMP-only
* c/o Linus Torvalds.
*
* These mean you can really definitely utterly forget about
@@ -28,14 +29,42 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/
-#ifdef CONFIG_SMP
-
struct flush_tlb_info {
struct mm_struct *flush_mm;
unsigned long flush_start;
unsigned long flush_end;
};
+static void load_new_mm_cr3(pgd_t *pgdir)
+{
+ unsigned long new_mm_cr3 = __pa(pgdir);
+
+ if (kaiser_enabled) {
+ /*
+ * We reuse the same PCID for different tasks, so we must
+ * flush all the entries for the PCID out when we change tasks.
+ * Flush KERN below, flush USER when returning to userspace in
+ * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro.
+ *
+ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
+ * do it here, but can only be used if X86_FEATURE_INVPCID is
+ * available - and many machines support pcid without invpcid.
+ *
+ * If X86_CR3_PCID_KERN_FLUSH actually added something, then it
+ * would be needed in the write_cr3() below - if PCIDs enabled.
+ */
+ BUILD_BUG_ON(X86_CR3_PCID_KERN_FLUSH);
+ kaiser_flush_tlb_on_return_to_user();
+ }
+
+ /*
+ * Caution: many callers of this function expect
+ * that load_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask writes.
+ */
+ write_cr3(new_mm_cr3);
+}
+
/*
* We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
@@ -47,7 +76,7 @@
BUG();
if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
- load_cr3(swapper_pg_dir);
+ load_new_mm_cr3(swapper_pg_dir);
/*
* This gets called in the idle path where RCU
* functions differently. Tracing normally
@@ -59,8 +88,6 @@
}
EXPORT_SYMBOL_GPL(leave_mm);
-#endif /* CONFIG_SMP */
-
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -83,7 +110,7 @@
* mapped in the new pgd, we'll double-fault. Forcibly
* map it.
*/
- unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
+ unsigned int stack_pgd_index = pgd_index(current_stack_pointer);
pgd_t *pgd = next->pgd + stack_pgd_index;
@@ -91,10 +118,8 @@
set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
}
-#ifdef CONFIG_SMP
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
this_cpu_write(cpu_tlbstate.active_mm, next);
-#endif
cpumask_set_cpu(cpu, mm_cpumask(next));
@@ -126,7 +151,7 @@
* ordering guarantee we need.
*
*/
- load_cr3(next->pgd);
+ load_new_mm_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
@@ -152,9 +177,7 @@
if (unlikely(prev->context.ldt != next->context.ldt))
load_mm_ldt(next);
#endif
- }
-#ifdef CONFIG_SMP
- else {
+ } else {
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
@@ -175,17 +198,14 @@
* As above, load_cr3() is serializing and orders TLB
* fills with respect to the mm_cpumask write.
*/
- load_cr3(next->pgd);
+ load_new_mm_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
load_mm_cr4(next);
load_mm_ldt(next);
}
}
-#endif
}
-#ifdef CONFIG_SMP
-
/*
* The flush IPI assumes that a thread switch happens in this order:
* [cpu0: the cpu that switches]
@@ -287,23 +307,6 @@
smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
}
-void flush_tlb_current_task(void)
-{
- struct mm_struct *mm = current->mm;
-
- preempt_disable();
-
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
-
- /* This is an implicit full barrier that synchronizes with switch_mm. */
- local_flush_tlb();
-
- trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
- preempt_enable();
-}
-
/*
* See Documentation/x86/tlb.txt for details. We choose 33
* because it is large enough to cover the vast majority (at
@@ -324,6 +327,12 @@
unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
preempt_disable();
+
+ if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
+ base_pages_to_flush = (end - start) >> PAGE_SHIFT;
+ if (base_pages_to_flush > tlb_single_page_flush_ceiling)
+ base_pages_to_flush = TLB_FLUSH_ALL;
+
if (current->active_mm != mm) {
/* Synchronize with switch_mm. */
smp_mb();
@@ -340,15 +349,11 @@
goto out;
}
- if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
- base_pages_to_flush = (end - start) >> PAGE_SHIFT;
-
/*
* Both branches below are implicit full barriers (MOV to CR or
* INVLPG) that synchronize with switch_mm.
*/
- if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
- base_pages_to_flush = TLB_FLUSH_ALL;
+ if (base_pages_to_flush == TLB_FLUSH_ALL) {
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
local_flush_tlb();
} else {
@@ -369,33 +374,6 @@
preempt_enable();
}
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
-{
- struct mm_struct *mm = vma->vm_mm;
-
- preempt_disable();
-
- if (current->active_mm == mm) {
- if (current->mm) {
- /*
- * Implicit full barrier (INVLPG) that synchronizes
- * with switch_mm.
- */
- __flush_tlb_one(start);
- } else {
- leave_mm(smp_processor_id());
-
- /* Synchronize with switch_mm. */
- smp_mb();
- }
- }
-
- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE);
-
- preempt_enable();
-}
-
static void do_flush_tlb_all(void *info)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
@@ -480,5 +458,3 @@
return 0;
}
late_initcall(create_tlb_single_page_flush_ceiling);
-
-#endif /* CONFIG_SMP */
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 15f7436..7840331 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -278,10 +278,10 @@
/* if (index >= array->map.max_entries)
* goto out;
*/
- EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */
+ EMIT2(0x89, 0xD2); /* mov edx, edx */
+ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
- EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */
-#define OFFSET1 47 /* number of bytes to jump */
+#define OFFSET1 43 /* number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt;
@@ -290,21 +290,20 @@
*/
EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 36
+#define OFFSET2 32
EMIT2(X86_JA, OFFSET2); /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
/* prog = array->ptrs[index]; */
- EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
+ EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
offsetof(struct bpf_array, ptrs));
- EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */
/* if (prog == NULL)
* goto out;
*/
- EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */
+ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
#define OFFSET3 10
EMIT2(X86_JE, OFFSET3); /* je out */
label3 = cnt;
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index bb461cf..526536c 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -97,7 +97,7 @@
* We should get host bridge information from ACPI unless the BIOS
* doesn't support it.
*/
- if (acpi_os_get_root_pointer())
+ if (!acpi_disabled && acpi_os_get_root_pointer())
return 0;
#endif
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 2f25a36..dcb2d9d 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -142,7 +142,7 @@
return 0;
gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
- efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
+ efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
if (!efi_pgd)
return -ENOMEM;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 9e42842..0f017518 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1848,7 +1848,6 @@
ops.write_payload_first(pnode, first);
ops.write_payload_last(pnode, last);
- ops.write_g_sw_ack(pnode, 0xffffUL);
/* in effect, all msg_type's are set to MSG_NOOP */
memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 836a1eb..3ee234b6 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -6,6 +6,7 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <os.h>
@@ -369,7 +370,9 @@
mm->arch.ldt.entry_count = 0;
}
-int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
+ unsigned long , bytecount)
{
- return do_modify_ldt_skas(func, ptr, bytecount);
+ /* See non-um modify_ldt() for why we do this cast */
+ return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 8f1f7ef..2bea87c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -444,6 +444,12 @@
~((1 << X86_FEATURE_MTRR) | /* disable MTRR */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
+ /*
+ * Xen PV would need some work to support PCID: CR3 handling as well
+ * as xen_flush_tlb_others() would need updating.
+ */
+ cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_PCID % 32)); /* disable PCID */
+
if (!xen_initial_domain())
cpuid_leaf1_edx_mask &=
~((1 << X86_FEATURE_ACPI)); /* disable ACPI */
diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h
index b39531b..72bfc1c 100644
--- a/arch/xtensa/include/asm/futex.h
+++ b/arch/xtensa/include/asm/futex.h
@@ -109,7 +109,6 @@
u32 oldval, u32 newval)
{
int ret = 0;
- u32 prev;
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
@@ -120,26 +119,24 @@
__asm__ __volatile__ (
" # futex_atomic_cmpxchg_inatomic\n"
- "1: l32i %1, %3, 0\n"
- " mov %0, %5\n"
- " wsr %1, scompare1\n"
- "2: s32c1i %0, %3, 0\n"
- "3:\n"
+ " wsr %5, scompare1\n"
+ "1: s32c1i %1, %4, 0\n"
+ " s32i %1, %6, 0\n"
+ "2:\n"
" .section .fixup,\"ax\"\n"
" .align 4\n"
- "4: .long 3b\n"
- "5: l32r %1, 4b\n"
- " movi %0, %6\n"
+ "3: .long 2b\n"
+ "4: l32r %1, 3b\n"
+ " movi %0, %7\n"
" jx %1\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- " .long 1b,5b,2b,5b\n"
+ " .long 1b,4b\n"
" .previous\n"
- : "+r" (ret), "=&r" (prev), "+m" (*uaddr)
- : "r" (uaddr), "r" (oldval), "r" (newval), "I" (-EFAULT)
+ : "+r" (ret), "+r" (newval), "+m" (*uaddr), "+m" (*uval)
+ : "r" (uaddr), "r" (oldval), "r" (uval), "I" (-EFAULT)
: "memory");
- *uval = prev;
return ret;
}
diff --git a/block/badblocks.c b/block/badblocks.c
index 6ebcef2..2fe6c11 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -178,7 +178,7 @@
if (bb->shift < 0)
/* badblocks are disabled */
- return 0;
+ return 1;
if (bb->shift) {
/* round the start down, and the end up */
diff --git a/block/blk-core.c b/block/blk-core.c
index 1e63ee1..b2e5764 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -529,8 +529,8 @@
blk_queue_for_each_rl(rl, q) {
if (rl->rq_pool) {
- wake_up(&rl->wait[BLK_RW_SYNC]);
- wake_up(&rl->wait[BLK_RW_ASYNC]);
+ wake_up_all(&rl->wait[BLK_RW_SYNC]);
+ wake_up_all(&rl->wait[BLK_RW_ASYNC]);
}
}
}
@@ -3583,76 +3583,43 @@
* TODO : If necessary, we can make the histograms per-cpu and aggregate
* them when printing them out.
*/
-void
-blk_zero_latency_hist(struct io_latency_state *s)
-{
- memset(s->latency_y_axis_read, 0,
- sizeof(s->latency_y_axis_read));
- memset(s->latency_y_axis_write, 0,
- sizeof(s->latency_y_axis_write));
- s->latency_reads_elems = 0;
- s->latency_writes_elems = 0;
-}
-EXPORT_SYMBOL(blk_zero_latency_hist);
-
ssize_t
-blk_latency_hist_show(struct io_latency_state *s, char *buf)
+blk_latency_hist_show(char* name, struct io_latency_state *s, char *buf,
+ int buf_size)
{
int i;
int bytes_written = 0;
u_int64_t num_elem, elem;
int pct;
+ u_int64_t average;
- num_elem = s->latency_reads_elems;
- if (num_elem > 0) {
- bytes_written += scnprintf(buf + bytes_written,
- PAGE_SIZE - bytes_written,
- "IO svc_time Read Latency Histogram (n = %llu):\n",
- num_elem);
- for (i = 0;
- i < ARRAY_SIZE(latency_x_axis_us);
- i++) {
- elem = s->latency_y_axis_read[i];
- pct = div64_u64(elem * 100, num_elem);
- bytes_written += scnprintf(buf + bytes_written,
- PAGE_SIZE - bytes_written,
- "\t< %5lluus%15llu%15d%%\n",
- latency_x_axis_us[i],
- elem, pct);
- }
- /* Last element in y-axis table is overflow */
- elem = s->latency_y_axis_read[i];
- pct = div64_u64(elem * 100, num_elem);
- bytes_written += scnprintf(buf + bytes_written,
- PAGE_SIZE - bytes_written,
- "\t> %5dms%15llu%15d%%\n", 10,
- elem, pct);
+ num_elem = s->latency_elems;
+ if (num_elem > 0) {
+ average = div64_u64(s->latency_sum, s->latency_elems);
+ bytes_written += scnprintf(buf + bytes_written,
+ buf_size - bytes_written,
+ "IO svc_time %s Latency Histogram (n = %llu,"
+ " average = %llu):\n", name, num_elem, average);
+ for (i = 0;
+ i < ARRAY_SIZE(latency_x_axis_us);
+ i++) {
+ elem = s->latency_y_axis[i];
+ pct = div64_u64(elem * 100, num_elem);
+ bytes_written += scnprintf(buf + bytes_written,
+ PAGE_SIZE - bytes_written,
+ "\t< %6lluus%15llu%15d%%\n",
+ latency_x_axis_us[i],
+ elem, pct);
+ }
+ /* Last element in y-axis table is overflow */
+ elem = s->latency_y_axis[i];
+ pct = div64_u64(elem * 100, num_elem);
+ bytes_written += scnprintf(buf + bytes_written,
+ PAGE_SIZE - bytes_written,
+ "\t>=%6lluus%15llu%15d%%\n",
+ latency_x_axis_us[i - 1], elem, pct);
}
- num_elem = s->latency_writes_elems;
- if (num_elem > 0) {
- bytes_written += scnprintf(buf + bytes_written,
- PAGE_SIZE - bytes_written,
- "IO svc_time Write Latency Histogram (n = %llu):\n",
- num_elem);
- for (i = 0;
- i < ARRAY_SIZE(latency_x_axis_us);
- i++) {
- elem = s->latency_y_axis_write[i];
- pct = div64_u64(elem * 100, num_elem);
- bytes_written += scnprintf(buf + bytes_written,
- PAGE_SIZE - bytes_written,
- "\t< %5lluus%15llu%15d%%\n",
- latency_x_axis_us[i],
- elem, pct);
- }
- /* Last element in y-axis table is overflow */
- elem = s->latency_y_axis_write[i];
- pct = div64_u64(elem * 100, num_elem);
- bytes_written += scnprintf(buf + bytes_written,
- PAGE_SIZE - bytes_written,
- "\t> %5dms%15llu%15d%%\n", 10,
- elem, pct);
- }
+
return bytes_written;
}
EXPORT_SYMBOL(blk_latency_hist_show);
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 01fb455..8c0894e 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -429,7 +429,7 @@
kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
}
-static void blk_mq_sysfs_init(struct request_queue *q)
+void blk_mq_sysfs_init(struct request_queue *q)
{
struct blk_mq_ctx *ctx;
int cpu;
@@ -449,8 +449,6 @@
blk_mq_disable_hotplug();
- blk_mq_sysfs_init(q);
-
ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
if (ret < 0)
goto out;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index dcf5ce3..4bc701b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -311,6 +311,9 @@
for (i = 0; i < set->nr_hw_queues; i++) {
struct blk_mq_tags *tags = set->tags[i];
+ if (!tags)
+ continue;
+
for (j = 0; j < tags->nr_tags; j++) {
if (!tags->rqs[j])
continue;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7b597ec..10f8f94 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1707,7 +1707,6 @@
struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
struct blk_mq_hw_ctx *hctx;
- memset(__ctx, 0, sizeof(*__ctx));
__ctx->cpu = i;
spin_lock_init(&__ctx->lock);
INIT_LIST_HEAD(&__ctx->rq_list);
@@ -1970,6 +1969,9 @@
if (!q->queue_ctx)
goto err_exit;
+ /* init q->mq_kobj and sw queues' kobjects */
+ blk_mq_sysfs_init(q);
+
q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
GFP_KERNEL, set->numa_node);
if (!q->queue_hw_ctx)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index e5d2524..c55bcf6 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -50,6 +50,7 @@
/*
* sysfs helpers
*/
+extern void blk_mq_sysfs_init(struct request_queue *q);
extern int blk_mq_sysfs_register(struct request_queue *q);
extern void blk_mq_sysfs_unregister(struct request_queue *q);
extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 84d7148..ab0d93a 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -120,7 +120,7 @@
config CRYPTO_ECDH
tristate "ECDH algorithm"
- select CRYTPO_KPP
+ select CRYPTO_KPP
help
Generic implementation of the ECDH algorithm
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index f5e18c2..ca50eeb 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -149,7 +149,7 @@
static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
- const u32 forbidden = CRYPTO_ALG_INTERNAL;
+ const u32 allowed = CRYPTO_ALG_KERN_DRIVER_ONLY;
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct sockaddr_alg *sa = (void *)uaddr;
@@ -157,6 +157,10 @@
void *private;
int err;
+ /* If caller uses non-allowed flag, return error. */
+ if ((sa->salg_feat & ~allowed) || (sa->salg_mask & ~allowed))
+ return -EINVAL;
+
if (sock->state == SS_CONNECTED)
return -EINVAL;
@@ -175,9 +179,7 @@
if (IS_ERR(type))
return PTR_ERR(type);
- private = type->bind(sa->salg_name,
- sa->salg_feat & ~forbidden,
- sa->salg_mask & ~forbidden);
+ private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask);
if (IS_ERR(private)) {
module_put(type->owner);
return PTR_ERR(private);
diff --git a/crypto/ahash.c b/crypto/ahash.c
index cce0268..f3fa104 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -625,5 +625,16 @@
}
EXPORT_SYMBOL_GPL(ahash_attr_alg);
+bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg)
+{
+ struct crypto_alg *alg = &halg->base;
+
+ if (alg->cra_type != &crypto_ahash_type)
+ return crypto_shash_alg_has_setkey(__crypto_shash_alg(alg));
+
+ return __crypto_ahash_alg(alg)->setkey != NULL;
+}
+EXPORT_SYMBOL_GPL(crypto_hash_alg_has_setkey);
+
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Asynchronous cryptographic hash type");
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 1fad2a6..5c098ff 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -167,6 +167,18 @@
spawn->alg = NULL;
spawns = &inst->alg.cra_users;
+
+ /*
+ * We may encounter an unregistered instance here, since
+ * an instance's spawns are set up prior to the instance
+ * being registered. An unregistered instance will have
+ * NULL ->cra_users.next, since ->cra_users isn't
+ * properly initialized until registration. But an
+ * unregistered instance cannot have any users, so treat
+ * it the same as ->cra_users being empty.
+ */
+ if (spawns->next == NULL)
+ break;
}
} while ((spawns = crypto_more_spawns(alg, &stack, &top,
&secondary_spawns)));
diff --git a/crypto/api.c b/crypto/api.c
index bbc147c..e5c1abf 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -24,6 +24,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/completion.h>
#include "internal.h"
LIST_HEAD(crypto_alg_list);
@@ -611,5 +612,17 @@
}
EXPORT_SYMBOL_GPL(crypto_has_alg);
+void crypto_req_done(struct crypto_async_request *req, int err)
+{
+ struct crypto_wait *wait = req->data;
+
+ if (err == -EINPROGRESS)
+ return;
+
+ wait->err = err;
+ complete(&wait->completion);
+}
+EXPORT_SYMBOL_GPL(crypto_req_done);
+
MODULE_DESCRIPTION("Cryptographic core API");
MODULE_LICENSE("GPL");
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index 2ffd697..5a37962 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -150,7 +150,7 @@
pr_devel("Sig %u: Found cert serial match X.509[%u]\n",
sinfo->index, certix);
- if (x509->pub->pkey_algo != sinfo->sig->pkey_algo) {
+ if (strcmp(x509->pub->pkey_algo, sinfo->sig->pkey_algo) != 0) {
pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n",
sinfo->index);
continue;
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index c80765b..029f705 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -408,6 +408,8 @@
ctx->cert->pub->pkey_algo = "rsa";
/* Discard the BIT STRING metadata */
+ if (vlen < 1 || *(const u8 *)value != 0)
+ return -EBADMSG;
ctx->key = value + 1;
ctx->key_size = vlen - 1;
return 0;
diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c
index fb73229..e16009a 100644
--- a/crypto/asymmetric_keys/x509_public_key.c
+++ b/crypto/asymmetric_keys/x509_public_key.c
@@ -125,7 +125,7 @@
}
ret = -EKEYREJECTED;
- if (cert->pub->pkey_algo != cert->sig->pkey_algo)
+ if (strcmp(cert->pub->pkey_algo, cert->sig->pkey_algo) != 0)
goto out;
ret = public_key_verify_signature(cert->pub, cert->sig);
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index e899ef5..cb1c3a3 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -610,6 +610,11 @@
algt->mask));
if (IS_ERR(poly))
return PTR_ERR(poly);
+ poly_hash = __crypto_hash_alg_common(poly);
+
+ err = -EINVAL;
+ if (poly_hash->digestsize != POLY1305_DIGEST_SIZE)
+ goto out_put_poly;
err = -ENOMEM;
inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
@@ -618,7 +623,6 @@
ctx = aead_instance_ctx(inst);
ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize;
- poly_hash = __crypto_hash_alg_common(poly);
err = crypto_init_ahash_spawn(&ctx->poly, poly_hash,
aead_crypto_instance(inst));
if (err)
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 0c654e5..af9ad45 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -691,7 +691,8 @@
inst->alg.finup = cryptd_hash_finup_enqueue;
inst->alg.export = cryptd_hash_export;
inst->alg.import = cryptd_hash_import;
- inst->alg.setkey = cryptd_hash_setkey;
+ if (crypto_shash_alg_has_setkey(salg))
+ inst->alg.setkey = cryptd_hash_setkey;
inst->alg.digest = cryptd_hash_digest_enqueue;
err = ahash_register_instance(tmpl, inst);
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 72e38c0..ba07fb6 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -194,11 +194,15 @@
salg = shash_attr_alg(tb[1], 0, 0);
if (IS_ERR(salg))
return PTR_ERR(salg);
+ alg = &salg->base;
+ /* The underlying hash algorithm must be unkeyed */
err = -EINVAL;
+ if (crypto_shash_alg_has_setkey(salg))
+ goto out_put_alg;
+
ds = salg->digestsize;
ss = salg->statesize;
- alg = &salg->base;
if (ds > alg->cra_blocksize ||
ss < alg->cra_blocksize)
goto out_put_alg;
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index c207458..6e9389c 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -80,6 +80,7 @@
pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
+ spin_lock_init(&cpu_queue->q_lock);
}
return 0;
}
@@ -103,15 +104,16 @@
int cpu, err;
struct mcryptd_cpu_queue *cpu_queue;
- cpu = get_cpu();
- cpu_queue = this_cpu_ptr(queue->cpu_queue);
- rctx->tag.cpu = cpu;
+ cpu_queue = raw_cpu_ptr(queue->cpu_queue);
+ spin_lock(&cpu_queue->q_lock);
+ cpu = smp_processor_id();
+ rctx->tag.cpu = smp_processor_id();
err = crypto_enqueue_request(&cpu_queue->queue, request);
pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
cpu, cpu_queue, request);
+ spin_unlock(&cpu_queue->q_lock);
queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
- put_cpu();
return err;
}
@@ -160,16 +162,11 @@
cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
i = 0;
while (i < MCRYPTD_BATCH || single_task_running()) {
- /*
- * preempt_disable/enable is used to prevent
- * being preempted by mcryptd_enqueue_request()
- */
- local_bh_disable();
- preempt_disable();
+
+ spin_lock_bh(&cpu_queue->q_lock);
backlog = crypto_get_backlog(&cpu_queue->queue);
req = crypto_dequeue_request(&cpu_queue->queue);
- preempt_enable();
- local_bh_enable();
+ spin_unlock_bh(&cpu_queue->q_lock);
if (!req) {
mcryptd_opportunistic_flush();
@@ -184,7 +181,7 @@
++i;
}
if (cpu_queue->queue.qlen)
- queue_work(kcrypto_wq, &cpu_queue->work);
+ queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
}
void mcryptd_flusher(struct work_struct *__work)
@@ -537,7 +534,8 @@
inst->alg.finup = mcryptd_hash_finup_enqueue;
inst->alg.export = mcryptd_hash_export;
inst->alg.import = mcryptd_hash_import;
- inst->alg.setkey = mcryptd_hash_setkey;
+ if (crypto_hash_alg_has_setkey(halg))
+ inst->alg.setkey = mcryptd_hash_setkey;
inst->alg.digest = mcryptd_hash_digest_enqueue;
err = ahash_register_instance(tmpl, inst);
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index ee9cfb9..f8ec3d4 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -254,6 +254,14 @@
crypto_free_aead(ctx->child);
}
+static void pcrypt_free(struct aead_instance *inst)
+{
+ struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst);
+
+ crypto_drop_aead(&ctx->spawn);
+ kfree(inst);
+}
+
static int pcrypt_init_instance(struct crypto_instance *inst,
struct crypto_alg *alg)
{
@@ -319,6 +327,8 @@
inst->alg.encrypt = pcrypt_aead_encrypt;
inst->alg.decrypt = pcrypt_aead_decrypt;
+ inst->free = pcrypt_free;
+
err = aead_register_instance(tmpl, inst);
if (err)
goto out_drop_aead;
@@ -349,14 +359,6 @@
return -EINVAL;
}
-static void pcrypt_free(struct crypto_instance *inst)
-{
- struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst);
-
- crypto_drop_aead(&ctx->spawn);
- kfree(inst);
-}
-
static int pcrypt_cpumask_change_notify(struct notifier_block *self,
unsigned long val, void *data)
{
@@ -469,7 +471,6 @@
static struct crypto_template pcrypt_tmpl = {
.name = "pcrypt",
.create = pcrypt_create,
- .free = pcrypt_free,
.module = THIS_MODULE,
};
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index 2df9835d..bca9923 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -51,17 +51,6 @@
}
EXPORT_SYMBOL_GPL(crypto_poly1305_init);
-int crypto_poly1305_setkey(struct crypto_shash *tfm,
- const u8 *key, unsigned int keylen)
-{
- /* Poly1305 requires a unique key for each tag, which implies that
- * we can't set it on the tfm that gets accessed by multiple users
- * simultaneously. Instead we expect the key as the first 32 bytes in
- * the update() call. */
- return -ENOTSUPP;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_setkey);
-
static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
{
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
@@ -80,6 +69,11 @@
dctx->s[3] = le32_to_cpuvp(key + 12);
}
+/*
+ * Poly1305 requires a unique key for each tag, which implies that we can't set
+ * it on the tfm that gets accessed by multiple users simultaneously. Instead we
+ * expect the key as the first 32 bytes in the update() call.
+ */
unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
const u8 *src, unsigned int srclen)
{
@@ -285,7 +279,6 @@
.init = crypto_poly1305_init,
.update = crypto_poly1305_update,
.final = crypto_poly1305_final,
- .setkey = crypto_poly1305_setkey,
.descsize = sizeof(struct poly1305_desc_ctx),
.base = {
.cra_name = "poly1305",
diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c
index 0b66dc8..cad395d 100644
--- a/crypto/rsa_helper.c
+++ b/crypto/rsa_helper.c
@@ -30,7 +30,7 @@
return -EINVAL;
if (fips_enabled) {
- while (!*ptr && n_sz) {
+ while (n_sz && !*ptr) {
ptr++;
n_sz--;
}
diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c
index f550b5d..d7da0ee 100644
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
@@ -188,13 +188,6 @@
salsa20_ivsetup(ctx, walk.iv);
- if (likely(walk.nbytes == nbytes))
- {
- salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes);
- return blkcipher_walk_done(desc, &walk, 0);
- }
-
while (walk.nbytes >= 64) {
salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
walk.src.virt.addr,
diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c
index 7e8ed96..a68be62 100644
--- a/crypto/sha3_generic.c
+++ b/crypto/sha3_generic.c
@@ -18,6 +18,7 @@
#include <linux/types.h>
#include <crypto/sha3.h>
#include <asm/byteorder.h>
+#include <asm/unaligned.h>
#define KECCAK_ROUNDS 24
@@ -149,7 +150,7 @@
unsigned int i;
for (i = 0; i < sctx->rsizw; i++)
- sctx->st[i] ^= ((u64 *) src)[i];
+ sctx->st[i] ^= get_unaligned_le64(src + 8 * i);
keccakf(sctx->st);
done += sctx->rsiz;
@@ -174,7 +175,7 @@
sctx->buf[sctx->rsiz - 1] |= 0x80;
for (i = 0; i < sctx->rsizw; i++)
- sctx->st[i] ^= ((u64 *) sctx->buf)[i];
+ sctx->st[i] ^= get_unaligned_le64(sctx->buf + 8 * i);
keccakf(sctx->st);
diff --git a/crypto/shash.c b/crypto/shash.c
index 4d8a671..9bd5044 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -24,11 +24,12 @@
static const struct crypto_type crypto_shash_type;
-static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen)
+int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen)
{
return -ENOSYS;
}
+EXPORT_SYMBOL_GPL(shash_no_setkey);
static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
unsigned int keylen)
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index ae22f05..2a07341 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -223,11 +223,13 @@
}
sg_init_table(sg, np + 1);
- np--;
+ if (rem)
+ np--;
for (k = 0; k < np; k++)
sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE);
- sg_set_buf(&sg[k + 1], xbuf[k], rem);
+ if (rem)
+ sg_set_buf(&sg[k + 1], xbuf[k], rem);
}
static void test_aead_speed(const char *algo, int enc, unsigned int secs,
@@ -342,7 +344,7 @@
}
sg_init_aead(sg, xbuf,
- *b_size + (enc ? authsize : 0));
+ *b_size + (enc ? 0 : authsize));
sg_init_aead(sgout, xoutbuf,
*b_size + (enc ? authsize : 0));
@@ -350,7 +352,9 @@
sg_set_buf(&sg[0], assoc, aad_size);
sg_set_buf(&sgout[0], assoc, aad_size);
- aead_request_set_crypt(req, sg, sgout, *b_size, iv);
+ aead_request_set_crypt(req, sg, sgout,
+ *b_size + (enc ? 0 : authsize),
+ iv);
aead_request_set_ad(req, aad_size);
if (secs)
diff --git a/drivers/Kconfig b/drivers/Kconfig
index e1e2066..de581c1 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -202,4 +202,6 @@
source "drivers/fpga/Kconfig"
+source "drivers/tee/Kconfig"
+
endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 733bf0b..9a90575 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -174,3 +174,4 @@
obj-$(CONFIG_ANDROID) += android/
obj-$(CONFIG_NVMEM) += nvmem/
obj-$(CONFIG_FPGA) += fpga/
+obj-$(CONFIG_TEE) += tee/
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 3de3b6b..f43a586 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -182,11 +182,6 @@
void __weak arch_unregister_cpu(int cpu) {}
-int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
-{
- return -ENODEV;
-}
-
static int acpi_processor_hotadd_init(struct acpi_processor *pr)
{
unsigned long long sta;
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index 691814d..943702d 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -594,25 +594,20 @@
void acpi_ns_terminate(void)
{
acpi_status status;
+ union acpi_operand_object *prev;
+ union acpi_operand_object *next;
ACPI_FUNCTION_TRACE(ns_terminate);
-#ifdef ACPI_EXEC_APP
- {
- union acpi_operand_object *prev;
- union acpi_operand_object *next;
+ /* Delete any module-level code blocks */
- /* Delete any module-level code blocks */
-
- next = acpi_gbl_module_code_list;
- while (next) {
- prev = next;
- next = next->method.mutex;
- prev->method.mutex = NULL; /* Clear the Mutex (cheated) field */
- acpi_ut_remove_reference(prev);
- }
+ next = acpi_gbl_module_code_list;
+ while (next) {
+ prev = next;
+ next = next->method.mutex;
+ prev->method.mutex = NULL; /* Clear the Mutex (cheated) field */
+ acpi_ut_remove_reference(prev);
}
-#endif
/*
* Free the entire namespace -- all nodes and all objects
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index ec4f507..4558cc7 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1020,7 +1020,7 @@
/* The record may be cleared by others, try read next record */
if (len == -ENOENT)
goto skip;
- else if (len < sizeof(*rcd)) {
+ else if (len < 0 || len < sizeof(*rcd)) {
rc = -EIO;
goto out;
}
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 56190d0..0a3ca20 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1197,7 +1197,6 @@
acpi_wakeup_device_init();
acpi_debugger_init();
acpi_setup_sb_notify_handler();
- acpi_set_processor_mapping();
return 0;
}
diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c
index 7b2c48f..201c7ce 100644
--- a/drivers/acpi/device_sysfs.c
+++ b/drivers/acpi/device_sysfs.c
@@ -146,6 +146,10 @@
int count;
struct acpi_hardware_id *id;
+ /* Avoid unnecessarily loading modules for non present devices. */
+ if (!acpi_device_is_present(acpi_dev))
+ return 0;
+
/*
* Since we skip ACPI_DT_NAMESPACE_HID from the modalias below, 0 should
* be returned if ACPI_DT_NAMESPACE_HID is the only ACPI/PNP ID in the
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 73c9c7f..f06317d 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -99,13 +99,13 @@
return -ENODEV;
/*
- * If the device has a _HID (or _CID) returning a valid ACPI/PNP
- * device ID, it is better to make it look less attractive here, so that
- * the other device with the same _ADR value (that may not have a valid
- * device ID) can be matched going forward. [This means a second spec
- * violation in a row, so whatever we do here is best effort anyway.]
+ * If the device has a _HID returning a valid ACPI/PNP device ID, it is
+ * better to make it look less attractive here, so that the other device
+ * with the same _ADR value (that may not have a valid device ID) can be
+ * matched going forward. [This means a second spec violation in a row,
+ * so whatever we do here is best effort anyway.]
*/
- return sta_present && list_empty(&adev->pnp.ids) ?
+ return sta_present && !adev->pnp.type.platform_id ?
FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE;
}
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index f3bc901..b1815b2 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1390,6 +1390,11 @@
dev_name(&adev_dimm->dev));
return -ENXIO;
}
+ /*
+ * Record nfit_mem for the notification path to track back to
+ * the nfit sysfs attributes for this dimm device object.
+ */
+ dev_set_drvdata(&adev_dimm->dev, nfit_mem);
/*
* Until standardization materializes we need to consider 4
@@ -1446,9 +1451,11 @@
sysfs_put(nfit_mem->flags_attr);
nfit_mem->flags_attr = NULL;
}
- if (adev_dimm)
+ if (adev_dimm) {
acpi_remove_notify_handler(adev_dimm->handle,
ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
+ dev_set_drvdata(&adev_dimm->dev, NULL);
+ }
}
mutex_unlock(&acpi_desc->init_mutex);
}
@@ -1528,6 +1535,9 @@
struct kernfs_node *nfit_kernfs;
nvdimm = nfit_mem->nvdimm;
+ if (!nvdimm)
+ continue;
+
nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
if (nfit_kernfs)
nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 5c78ee1..fd59ae8 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -280,79 +280,6 @@
}
EXPORT_SYMBOL_GPL(acpi_get_cpuid);
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-static bool __init
-map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid)
-{
- int type, id;
- u32 acpi_id;
- acpi_status status;
- acpi_object_type acpi_type;
- unsigned long long tmp;
- union acpi_object object = { 0 };
- struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
-
- status = acpi_get_type(handle, &acpi_type);
- if (ACPI_FAILURE(status))
- return false;
-
- switch (acpi_type) {
- case ACPI_TYPE_PROCESSOR:
- status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
- if (ACPI_FAILURE(status))
- return false;
- acpi_id = object.processor.proc_id;
-
- /* validate the acpi_id */
- if(acpi_processor_validate_proc_id(acpi_id))
- return false;
- break;
- case ACPI_TYPE_DEVICE:
- status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
- if (ACPI_FAILURE(status))
- return false;
- acpi_id = tmp;
- break;
- default:
- return false;
- }
-
- type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
-
- *phys_id = __acpi_get_phys_id(handle, type, acpi_id, false);
- id = acpi_map_cpuid(*phys_id, acpi_id);
-
- if (id < 0)
- return false;
- *cpuid = id;
- return true;
-}
-
-static acpi_status __init
-set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context,
- void **rv)
-{
- phys_cpuid_t phys_id;
- int cpu_id;
-
- if (!map_processor(handle, &phys_id, &cpu_id))
- return AE_ERROR;
-
- acpi_map_cpu2node(handle, cpu_id, phys_id);
- return AE_OK;
-}
-
-void __init acpi_set_processor_mapping(void)
-{
- /* Set persistent cpu <-> node mapping for all processors. */
- acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
- ACPI_UINT32_MAX, set_processor_node_mapping,
- NULL, NULL, NULL);
-}
-#else
-void __init acpi_set_processor_mapping(void) {}
-#endif /* CONFIG_ACPI_HOTPLUG_CPU */
-
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base,
u64 *phys_addr, int *ioapic_id)
diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
index 2fa8304..7a34310 100644
--- a/drivers/acpi/sbshc.c
+++ b/drivers/acpi/sbshc.c
@@ -275,8 +275,8 @@
device->driver_data = hc;
acpi_ec_add_query_handler(hc->ec, hc->query_bit, NULL, smbus_alarm, hc);
- printk(KERN_INFO PREFIX "SBS HC: EC = 0x%p, offset = 0x%0x, query_bit = 0x%0x\n",
- hc->ec, hc->offset, hc->query_bit);
+ dev_info(&device->dev, "SBS HC: offset = 0x%0x, query_bit = 0x%0x\n",
+ hc->offset, hc->query_bit);
return 0;
}
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 1ef2f68..bb48a7b 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -512,8 +512,6 @@
* (protected by @inner_lock)
* @todo: list of work for this process
* (protected by @inner_lock)
- * @wait: wait queue head to wait for proc work
- * (invariant after initialized)
* @stats: per-process binder statistics
* (atomics, no lock needed)
* @delivered_death: list of delivered death notification
@@ -554,7 +552,6 @@
bool is_dead;
struct list_head todo;
- wait_queue_head_t wait;
struct binder_stats stats;
struct list_head delivered_death;
int max_threads;
@@ -4538,6 +4535,18 @@
if (t)
spin_lock(&t->lock);
}
+
+ /*
+ * If this thread used poll, make sure we remove the waitqueue
+ * from any epoll data structures holding it with POLLFREE.
+ * waitqueue_active() is safe to use here because we're holding
+ * the inner lock.
+ */
+ if ((thread->looper & BINDER_LOOPER_STATE_POLL) &&
+ waitqueue_active(&thread->wait)) {
+ wake_up_poll(&thread->wait, POLLHUP | POLLFREE);
+ }
+
binder_inner_proc_unlock(thread->proc);
if (send_reply)
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index c940382..9b46ef4 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -265,9 +265,9 @@
{ PCI_VDEVICE(INTEL, 0x3b23), board_ahci }, /* PCH AHCI */
{ PCI_VDEVICE(INTEL, 0x3b24), board_ahci }, /* PCH RAID */
{ PCI_VDEVICE(INTEL, 0x3b25), board_ahci }, /* PCH RAID */
- { PCI_VDEVICE(INTEL, 0x3b29), board_ahci }, /* PCH AHCI */
+ { PCI_VDEVICE(INTEL, 0x3b29), board_ahci }, /* PCH M AHCI */
{ PCI_VDEVICE(INTEL, 0x3b2b), board_ahci }, /* PCH RAID */
- { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH RAID */
+ { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH M RAID */
{ PCI_VDEVICE(INTEL, 0x3b2f), board_ahci }, /* PCH AHCI */
{ PCI_VDEVICE(INTEL, 0x19b0), board_ahci }, /* DNV AHCI */
{ PCI_VDEVICE(INTEL, 0x19b1), board_ahci }, /* DNV AHCI */
@@ -290,9 +290,9 @@
{ PCI_VDEVICE(INTEL, 0x19cE), board_ahci }, /* DNV AHCI */
{ PCI_VDEVICE(INTEL, 0x19cF), board_ahci }, /* DNV AHCI */
{ PCI_VDEVICE(INTEL, 0x1c02), board_ahci }, /* CPT AHCI */
- { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT AHCI */
+ { PCI_VDEVICE(INTEL, 0x1c03), board_ahci }, /* CPT M AHCI */
{ PCI_VDEVICE(INTEL, 0x1c04), board_ahci }, /* CPT RAID */
- { PCI_VDEVICE(INTEL, 0x1c05), board_ahci }, /* CPT RAID */
+ { PCI_VDEVICE(INTEL, 0x1c05), board_ahci }, /* CPT M RAID */
{ PCI_VDEVICE(INTEL, 0x1c06), board_ahci }, /* CPT RAID */
{ PCI_VDEVICE(INTEL, 0x1c07), board_ahci }, /* CPT RAID */
{ PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */
@@ -301,20 +301,20 @@
{ PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* PBG RAID */
{ PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */
{ PCI_VDEVICE(INTEL, 0x1e02), board_ahci }, /* Panther Point AHCI */
- { PCI_VDEVICE(INTEL, 0x1e03), board_ahci }, /* Panther Point AHCI */
+ { PCI_VDEVICE(INTEL, 0x1e03), board_ahci }, /* Panther Point M AHCI */
{ PCI_VDEVICE(INTEL, 0x1e04), board_ahci }, /* Panther Point RAID */
{ PCI_VDEVICE(INTEL, 0x1e05), board_ahci }, /* Panther Point RAID */
{ PCI_VDEVICE(INTEL, 0x1e06), board_ahci }, /* Panther Point RAID */
- { PCI_VDEVICE(INTEL, 0x1e07), board_ahci }, /* Panther Point RAID */
+ { PCI_VDEVICE(INTEL, 0x1e07), board_ahci }, /* Panther Point M RAID */
{ PCI_VDEVICE(INTEL, 0x1e0e), board_ahci }, /* Panther Point RAID */
{ PCI_VDEVICE(INTEL, 0x8c02), board_ahci }, /* Lynx Point AHCI */
- { PCI_VDEVICE(INTEL, 0x8c03), board_ahci }, /* Lynx Point AHCI */
+ { PCI_VDEVICE(INTEL, 0x8c03), board_ahci }, /* Lynx Point M AHCI */
{ PCI_VDEVICE(INTEL, 0x8c04), board_ahci }, /* Lynx Point RAID */
- { PCI_VDEVICE(INTEL, 0x8c05), board_ahci }, /* Lynx Point RAID */
+ { PCI_VDEVICE(INTEL, 0x8c05), board_ahci }, /* Lynx Point M RAID */
{ PCI_VDEVICE(INTEL, 0x8c06), board_ahci }, /* Lynx Point RAID */
- { PCI_VDEVICE(INTEL, 0x8c07), board_ahci }, /* Lynx Point RAID */
+ { PCI_VDEVICE(INTEL, 0x8c07), board_ahci }, /* Lynx Point M RAID */
{ PCI_VDEVICE(INTEL, 0x8c0e), board_ahci }, /* Lynx Point RAID */
- { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci }, /* Lynx Point RAID */
+ { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci }, /* Lynx Point M RAID */
{ PCI_VDEVICE(INTEL, 0x9c02), board_ahci }, /* Lynx Point-LP AHCI */
{ PCI_VDEVICE(INTEL, 0x9c03), board_ahci }, /* Lynx Point-LP AHCI */
{ PCI_VDEVICE(INTEL, 0x9c04), board_ahci }, /* Lynx Point-LP RAID */
@@ -355,21 +355,21 @@
{ PCI_VDEVICE(INTEL, 0x9c87), board_ahci }, /* Wildcat Point-LP RAID */
{ PCI_VDEVICE(INTEL, 0x9c8f), board_ahci }, /* Wildcat Point-LP RAID */
{ PCI_VDEVICE(INTEL, 0x8c82), board_ahci }, /* 9 Series AHCI */
- { PCI_VDEVICE(INTEL, 0x8c83), board_ahci }, /* 9 Series AHCI */
+ { PCI_VDEVICE(INTEL, 0x8c83), board_ahci }, /* 9 Series M AHCI */
{ PCI_VDEVICE(INTEL, 0x8c84), board_ahci }, /* 9 Series RAID */
- { PCI_VDEVICE(INTEL, 0x8c85), board_ahci }, /* 9 Series RAID */
+ { PCI_VDEVICE(INTEL, 0x8c85), board_ahci }, /* 9 Series M RAID */
{ PCI_VDEVICE(INTEL, 0x8c86), board_ahci }, /* 9 Series RAID */
- { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series RAID */
+ { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series M RAID */
{ PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */
- { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series RAID */
+ { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series M RAID */
{ PCI_VDEVICE(INTEL, 0x9d03), board_ahci }, /* Sunrise Point-LP AHCI */
{ PCI_VDEVICE(INTEL, 0x9d05), board_ahci }, /* Sunrise Point-LP RAID */
{ PCI_VDEVICE(INTEL, 0x9d07), board_ahci }, /* Sunrise Point-LP RAID */
{ PCI_VDEVICE(INTEL, 0xa102), board_ahci }, /* Sunrise Point-H AHCI */
- { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */
+ { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H M AHCI */
{ PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */
{ PCI_VDEVICE(INTEL, 0xa106), board_ahci }, /* Sunrise Point-H RAID */
- { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */
+ { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H M RAID */
{ PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */
{ PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/
@@ -383,6 +383,11 @@
{ PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa356), board_ahci }, /* Cannon Lake PCH-H RAID */
+ { PCI_VDEVICE(INTEL, 0x0f22), board_ahci }, /* Bay Trail AHCI */
+ { PCI_VDEVICE(INTEL, 0x0f23), board_ahci }, /* Bay Trail AHCI */
+ { PCI_VDEVICE(INTEL, 0x22a3), board_ahci }, /* Cherry Trail AHCI */
+ { PCI_VDEVICE(INTEL, 0x5ae3), board_ahci }, /* Apollo Lake AHCI */
/* JMicron 360/1/3/5/6, match class to avoid IDE function */
{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 33e363d..aee3952 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4322,6 +4322,7 @@
* https://bugzilla.kernel.org/show_bug.cgi?id=121671
*/
{ "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 },
+ { "LITEON EP1-*", NULL, ATA_HORKAGE_MAX_SEC_1024 },
/* Devices we expect to fail diagnostics */
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 051b615..8d22acd 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1481,7 +1481,6 @@
break;
default:
- WARN_ON_ONCE(1);
return AC_ERR_SYSTEM;
}
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 5fc81e2..e55f418 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -2802,7 +2802,7 @@
return err;
out_free_irq:
- free_irq(dev->irq, dev);
+ free_irq(irq, dev);
out_free:
kfree(dev);
out_release:
diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
index 10e1b9e..f03cf1d 100644
--- a/drivers/auxdisplay/Kconfig
+++ b/drivers/auxdisplay/Kconfig
@@ -121,6 +121,7 @@
config IMG_ASCII_LCD
tristate "Imagination Technologies ASCII LCD Display"
+ depends on HAS_IOMEM
default y if MIPS_MALTA || MIPS_SEAD3
select SYSCON
help
diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c
index 83f1439..6e8eaa7 100644
--- a/drivers/auxdisplay/img-ascii-lcd.c
+++ b/drivers/auxdisplay/img-ascii-lcd.c
@@ -442,3 +442,7 @@
.remove = img_ascii_lcd_remove,
};
module_platform_driver(img_ascii_lcd_driver);
+
+MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display");
+MODULE_AUTHOR("Paul Burton <paul.burton@mips.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index d02e7c0..0651010 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -235,6 +235,9 @@
config GENERIC_CPU_AUTOPROBE
bool
+config GENERIC_CPU_VULNERABILITIES
+ bool
+
config SOC_BUS
bool
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index e9fd32e..70e13cf 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -16,6 +16,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/acpi.h>
#include <linux/bitops.h>
#include <linux/cacheinfo.h>
#include <linux/compiler.h>
@@ -104,9 +105,16 @@
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct cacheinfo *this_leaf, *sib_leaf;
unsigned int index;
- int ret;
+ int ret = 0;
- ret = cache_setup_of_node(cpu);
+ if (this_cpu_ci->cpu_map_populated)
+ return 0;
+
+ if (of_have_populated_dt())
+ ret = cache_setup_of_node(cpu);
+ else if (!acpi_disabled)
+ /* No cache property/hierarchy support yet in ACPI */
+ ret = -ENOTSUPP;
if (ret)
return ret;
@@ -203,8 +211,7 @@
*/
ret = cache_shared_cpu_map_setup(cpu);
if (ret) {
- pr_warn("Unable to detect cache hierarchy from DT for CPU %d\n",
- cpu);
+ pr_warn("Unable to detect cache hierarchy for CPU %d\n", cpu);
goto free_ci;
}
return 0;
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 4c28e1a..56b6c85 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -499,10 +499,58 @@
#endif
}
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+
+ssize_t __weak cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+
+static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+ &dev_attr_meltdown.attr,
+ &dev_attr_spectre_v1.attr,
+ &dev_attr_spectre_v2.attr,
+ NULL
+};
+
+static const struct attribute_group cpu_root_vulnerabilities_group = {
+ .name = "vulnerabilities",
+ .attrs = cpu_root_vulnerabilities_attrs,
+};
+
+static void __init cpu_register_vulnerabilities(void)
+{
+ if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
+ &cpu_root_vulnerabilities_group))
+ pr_err("Unable to register CPU vulnerabilities\n");
+}
+
+#else
+static inline void cpu_register_vulnerabilities(void) { }
+#endif
+
void __init cpu_dev_init(void)
{
if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
panic("Failed to register CPU subsystem");
cpu_dev_register_generic();
+ cpu_register_vulnerabilities();
}
diff --git a/drivers/base/isa.c b/drivers/base/isa.c
index cd6ccdc..372d10a 100644
--- a/drivers/base/isa.c
+++ b/drivers/base/isa.c
@@ -39,7 +39,7 @@
{
struct isa_driver *isa_driver = dev->platform_data;
- if (isa_driver->probe)
+ if (isa_driver && isa_driver->probe)
return isa_driver->probe(dev, to_isa_dev(dev)->id);
return 0;
@@ -49,7 +49,7 @@
{
struct isa_driver *isa_driver = dev->platform_data;
- if (isa_driver->remove)
+ if (isa_driver && isa_driver->remove)
return isa_driver->remove(dev, to_isa_dev(dev)->id);
return 0;
@@ -59,7 +59,7 @@
{
struct isa_driver *isa_driver = dev->platform_data;
- if (isa_driver->shutdown)
+ if (isa_driver && isa_driver->shutdown)
isa_driver->shutdown(dev, to_isa_dev(dev)->id);
}
@@ -67,7 +67,7 @@
{
struct isa_driver *isa_driver = dev->platform_data;
- if (isa_driver->suspend)
+ if (isa_driver && isa_driver->suspend)
return isa_driver->suspend(dev, to_isa_dev(dev)->id, state);
return 0;
@@ -77,7 +77,7 @@
{
struct isa_driver *isa_driver = dev->platform_data;
- if (isa_driver->resume)
+ if (isa_driver && isa_driver->resume)
return isa_driver->resume(dev, to_isa_dev(dev)->id);
return 0;
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 6441dfd..a7c5b79 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -331,7 +331,7 @@
opp_table = _find_opp_table(dev);
if (IS_ERR(opp_table)) {
count = PTR_ERR(opp_table);
- dev_err(dev, "%s: OPP table not found (%d)\n",
+ dev_dbg(dev, "%s: OPP table not found (%d)\n",
__func__, count);
goto out_unlock;
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 24d6cef..402254d 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1558,9 +1558,8 @@
return err;
}
-static void lo_release(struct gendisk *disk, fmode_t mode)
+static void __lo_release(struct loop_device *lo)
{
- struct loop_device *lo = disk->private_data;
int err;
if (atomic_dec_return(&lo->lo_refcnt))
@@ -1586,6 +1585,13 @@
mutex_unlock(&lo->lo_ctl_mutex);
}
+static void lo_release(struct gendisk *disk, fmode_t mode)
+{
+ mutex_lock(&loop_index_mutex);
+ __lo_release(disk->private_data);
+ mutex_unlock(&loop_index_mutex);
+}
+
static const struct block_device_operations lo_fops = {
.owner = THIS_MODULE,
.open = lo_open,
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 98b767d..4d30da2 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -272,6 +272,7 @@
int result, flags;
struct nbd_request request;
unsigned long size = blk_rq_bytes(req);
+ struct bio *bio;
u32 type;
if (req->cmd_type == REQ_TYPE_DRV_PRIV)
@@ -305,16 +306,20 @@
return -EIO;
}
- if (type == NBD_CMD_WRITE) {
- struct req_iterator iter;
+ if (type != NBD_CMD_WRITE)
+ return 0;
+
+ flags = 0;
+ bio = req->bio;
+ while (bio) {
+ struct bio *next = bio->bi_next;
+ struct bvec_iter iter;
struct bio_vec bvec;
- /*
- * we are really probing at internals to determine
- * whether to set MSG_MORE or not...
- */
- rq_for_each_segment(bvec, req, iter) {
- flags = 0;
- if (!rq_iter_last(bvec, iter))
+
+ bio_for_each_segment(bvec, bio, iter) {
+ bool is_last = !next && bio_iter_last(bvec, iter);
+
+ if (is_last)
flags = MSG_MORE;
dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
cmd, bvec.bv_len);
@@ -325,7 +330,16 @@
result);
return -EIO;
}
+ /*
+ * The completion might already have come in,
+ * so break for the last one instead of letting
+ * the iterator do it. This prevents use-after-free
+ * of the bio.
+ */
+ if (is_last)
+ break;
}
+ bio = next;
}
return 0;
}
@@ -654,7 +668,10 @@
return nbd_size_set(nbd, bdev, nbd->blksize, arg);
case NBD_SET_TIMEOUT:
- nbd->tag_set.timeout = arg * HZ;
+ if (arg) {
+ nbd->tag_set.timeout = arg * HZ;
+ blk_queue_rq_timeout(nbd->disk->queue, arg * HZ);
+ }
return 0;
case NBD_SET_FLAGS:
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 90fa4ac..7e4ef05 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2779,7 +2779,7 @@
pd->pkt_dev = MKDEV(pktdev_major, idx);
ret = pkt_new_dev(pd, dev);
if (ret)
- goto out_new_dev;
+ goto out_mem2;
/* inherit events of the host device */
disk->events = pd->bdev->bd_disk->events;
@@ -2797,8 +2797,6 @@
mutex_unlock(&ctl_mutex);
return 0;
-out_new_dev:
- blk_cleanup_queue(disk->queue);
out_mem2:
put_disk(disk);
out_mem:
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 24f4b54..e32badd 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4511,7 +4511,7 @@
segment_size = rbd_obj_bytes(&rbd_dev->header);
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
q->limits.max_sectors = queue_max_hw_sectors(q);
- blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
+ blk_queue_max_segments(q, USHRT_MAX);
blk_queue_max_segment_size(q, segment_size);
blk_queue_io_min(q, segment_size);
blk_queue_io_opt(q, segment_size);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index c9914d65..b7c0b69 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1286,6 +1286,8 @@
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
+ zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE;
+ zram->disk->queue->limits.chunk_sectors = 0;
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
/*
* zram_bio_discard() will clear all logical blocks if logical block
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index 1cb958e..94e914a 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -31,6 +31,7 @@
#include <linux/errno.h>
#include <linux/skbuff.h>
+#include <linux/mmc/host.h>
#include <linux/mmc/sdio_ids.h>
#include <linux/mmc/sdio_func.h>
@@ -291,6 +292,14 @@
tuple = tuple->next;
}
+ /* BCM43341 devices soldered onto the PCB (non-removable) use an
+ * uart connection for bluetooth, ignore the BT SDIO interface.
+ */
+ if (func->vendor == SDIO_VENDOR_ID_BROADCOM &&
+ func->device == SDIO_DEVICE_ID_BROADCOM_43341 &&
+ !mmc_card_is_removable(func->card->host))
+ return -ENODEV;
+
data = devm_kzalloc(&func->dev, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 6930286..3257647 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -23,6 +23,7 @@
#include <linux/module.h>
#include <linux/usb.h>
+#include <linux/usb/quirks.h>
#include <linux/firmware.h>
#include <asm/unaligned.h>
@@ -369,8 +370,8 @@
#define BTUSB_FIRMWARE_LOADED 7
#define BTUSB_FIRMWARE_FAILED 8
#define BTUSB_BOOTING 9
-#define BTUSB_RESET_RESUME 10
-#define BTUSB_DIAG_RUNNING 11
+#define BTUSB_DIAG_RUNNING 10
+#define BTUSB_OOB_WAKE_ENABLED 11
struct btusb_data {
struct hci_dev *hdev;
@@ -2928,9 +2929,9 @@
/* QCA Rome devices lose their updated firmware over suspend,
* but the USB hub doesn't notice any status change.
- * Explicitly request a device reset on resume.
+ * explicitly request a device reset on resume.
*/
- set_bit(BTUSB_RESET_RESUME, &data->flags);
+ interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
}
#ifdef CONFIG_BT_HCIBTUSB_RTL
@@ -2941,7 +2942,7 @@
* but the USB hub doesn't notice any status change.
* Explicitly request a device reset on resume.
*/
- set_bit(BTUSB_RESET_RESUME, &data->flags);
+ interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
}
#endif
@@ -3098,14 +3099,6 @@
btusb_stop_traffic(data);
usb_kill_anchored_urbs(&data->tx_anchor);
- /* Optionally request a device reset on resume, but only when
- * wakeups are disabled. If wakeups are enabled we assume the
- * device will stay powered up throughout suspend.
- */
- if (test_bit(BTUSB_RESET_RESUME, &data->flags) &&
- !device_may_wakeup(&data->udev->dev))
- data->udev->reset_resume = 1;
-
return 0;
}
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 8900823..10f5613 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -1755,14 +1755,17 @@
raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock);
mutex_init(&cci_pmu->reserve_mutex);
atomic_set(&cci_pmu->active_events, 0);
- cpumask_set_cpu(smp_processor_id(), &cci_pmu->cpus);
+ cpumask_set_cpu(get_cpu(), &cci_pmu->cpus);
ret = cci_pmu_init(cci_pmu, pdev);
- if (ret)
+ if (ret) {
+ put_cpu();
return ret;
+ }
cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
&cci_pmu->node);
+ put_cpu();
pr_info("ARM %s PMU driver probed", cci_pmu->model->name);
return 0;
}
diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c
index aee8346..45d7ecc 100644
--- a/drivers/bus/arm-ccn.c
+++ b/drivers/bus/arm-ccn.c
@@ -1271,11 +1271,16 @@
int len = snprintf(NULL, 0, "ccn_%d", ccn->dt.id);
name = devm_kzalloc(ccn->dev, len + 1, GFP_KERNEL);
+ if (!name) {
+ err = -ENOMEM;
+ goto error_choose_name;
+ }
snprintf(name, len + 1, "ccn_%d", ccn->dt.id);
}
/* Perf driver registration */
ccn->dt.pmu = (struct pmu) {
+ .module = THIS_MODULE,
.attr_groups = arm_ccn_pmu_attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = arm_ccn_pmu_event_init,
@@ -1297,7 +1302,7 @@
}
/* Pick one CPU which we will use to collect data from CCN... */
- cpumask_set_cpu(smp_processor_id(), &ccn->dt.cpu);
+ cpumask_set_cpu(get_cpu(), &ccn->dt.cpu);
/* Also make sure that the overflow interrupt is handled by this CPU */
if (ccn->irq) {
@@ -1314,10 +1319,13 @@
cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
&ccn->dt.node);
+ put_cpu();
return 0;
error_pmu_register:
error_set_affinity:
+ put_cpu();
+error_choose_name:
ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id);
for (i = 0; i < ccn->num_xps; i++)
writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL);
@@ -1578,8 +1586,8 @@
static void __exit arm_ccn_exit(void)
{
- cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE);
platform_driver_unregister(&arm_ccn_driver);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE);
}
module_init(arm_ccn_init);
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
index 795c9d9..2051d92 100644
--- a/drivers/bus/sunxi-rsb.c
+++ b/drivers/bus/sunxi-rsb.c
@@ -178,6 +178,7 @@
.match = sunxi_rsb_device_match,
.probe = sunxi_rsb_device_probe,
.remove = sunxi_rsb_device_remove,
+ .uevent = of_device_uevent_modalias,
};
static void sunxi_rsb_dev_release(struct device *dev)
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index a112c01..e0a5315 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -241,6 +241,9 @@
/* The timer for this si. */
struct timer_list si_timer;
+ /* This flag is set, if the timer can be set */
+ bool timer_can_start;
+
/* This flag is set, if the timer is running (timer_pending() isn't enough) */
bool timer_running;
@@ -416,6 +419,8 @@
static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
{
+ if (!smi_info->timer_can_start)
+ return;
smi_info->last_timeout_jiffies = jiffies;
mod_timer(&smi_info->si_timer, new_val);
smi_info->timer_running = true;
@@ -435,21 +440,18 @@
smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
}
-static void start_check_enables(struct smi_info *smi_info, bool start_timer)
+static void start_check_enables(struct smi_info *smi_info)
{
unsigned char msg[2];
msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
- if (start_timer)
- start_new_msg(smi_info, msg, 2);
- else
- smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+ start_new_msg(smi_info, msg, 2);
smi_info->si_state = SI_CHECKING_ENABLES;
}
-static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
+static void start_clear_flags(struct smi_info *smi_info)
{
unsigned char msg[3];
@@ -458,10 +460,7 @@
msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
msg[2] = WDT_PRE_TIMEOUT_INT;
- if (start_timer)
- start_new_msg(smi_info, msg, 3);
- else
- smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
+ start_new_msg(smi_info, msg, 3);
smi_info->si_state = SI_CLEARING_FLAGS;
}
@@ -496,11 +495,11 @@
* Note that we cannot just use disable_irq(), since the interrupt may
* be shared.
*/
-static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer)
+static inline bool disable_si_irq(struct smi_info *smi_info)
{
if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
smi_info->interrupt_disabled = true;
- start_check_enables(smi_info, start_timer);
+ start_check_enables(smi_info);
return true;
}
return false;
@@ -510,7 +509,7 @@
{
if ((smi_info->irq) && (smi_info->interrupt_disabled)) {
smi_info->interrupt_disabled = false;
- start_check_enables(smi_info, true);
+ start_check_enables(smi_info);
return true;
}
return false;
@@ -528,7 +527,7 @@
msg = ipmi_alloc_smi_msg();
if (!msg) {
- if (!disable_si_irq(smi_info, true))
+ if (!disable_si_irq(smi_info))
smi_info->si_state = SI_NORMAL;
} else if (enable_si_irq(smi_info)) {
ipmi_free_smi_msg(msg);
@@ -544,7 +543,7 @@
/* Watchdog pre-timeout */
smi_inc_stat(smi_info, watchdog_pretimeouts);
- start_clear_flags(smi_info, true);
+ start_clear_flags(smi_info);
smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
if (smi_info->intf)
ipmi_smi_watchdog_pretimeout(smi_info->intf);
@@ -927,7 +926,7 @@
* disable and messages disabled.
*/
if (smi_info->supports_event_msg_buff || smi_info->irq) {
- start_check_enables(smi_info, true);
+ start_check_enables(smi_info);
} else {
smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
if (!smi_info->curr_msg)
@@ -1234,6 +1233,7 @@
/* Set up the timer that drives the interface. */
setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi);
+ new_smi->timer_can_start = true;
smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
/* Try to claim any interrupts. */
@@ -3448,10 +3448,12 @@
check_set_rcv_irq(smi_info);
}
-static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
+static inline void stop_timer_and_thread(struct smi_info *smi_info)
{
if (smi_info->thread != NULL)
kthread_stop(smi_info->thread);
+
+ smi_info->timer_can_start = false;
if (smi_info->timer_running)
del_timer_sync(&smi_info->si_timer);
}
@@ -3593,7 +3595,7 @@
* Start clearing the flags before we enable interrupts or the
* timer to avoid racing with the timer.
*/
- start_clear_flags(new_smi, false);
+ start_clear_flags(new_smi);
/*
* IRQ is defined to be set when non-zero. req_events will
@@ -3671,7 +3673,7 @@
return 0;
out_err_stop_timer:
- wait_for_timer_and_thread(new_smi);
+ stop_timer_and_thread(new_smi);
out_err:
new_smi->interrupt_disabled = true;
@@ -3865,7 +3867,7 @@
*/
if (to_clean->irq_cleanup)
to_clean->irq_cleanup(to_clean);
- wait_for_timer_and_thread(to_clean);
+ stop_timer_and_thread(to_clean);
/*
* Timeouts are stopped, now make sure the interrupts are off
@@ -3876,7 +3878,7 @@
poll(to_clean);
schedule_timeout_uninterruptible(1);
}
- disable_si_irq(to_clean, false);
+ disable_si_irq(to_clean);
while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
poll(to_clean);
schedule_timeout_uninterruptible(1);
diff --git a/drivers/clk/hisilicon/clk-hi6220.c b/drivers/clk/hisilicon/clk-hi6220.c
index c0e8e1f..2bfaf22 100644
--- a/drivers/clk/hisilicon/clk-hi6220.c
+++ b/drivers/clk/hisilicon/clk-hi6220.c
@@ -144,7 +144,7 @@
{ HI6220_BBPPLL_SEL, "bbppll_sel", "pll0_bbp_gate", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 9, 0, },
{ HI6220_MEDIA_PLL_SRC, "media_pll_src", "pll_media_gate", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 10, 0, },
{ HI6220_MMC2_SEL, "mmc2_sel", "mmc2_mux1", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 11, 0, },
- { HI6220_CS_ATB_SYSPLL, "cs_atb_syspll", "syspll", CLK_SET_RATE_PARENT|CLK_IGNORE_UNUSED, 0x270, 12, 0, },
+ { HI6220_CS_ATB_SYSPLL, "cs_atb_syspll", "syspll", CLK_SET_RATE_PARENT|CLK_IS_CRITICAL, 0x270, 12, 0, },
};
static struct hisi_mux_clock hi6220_mux_clks_sys[] __initdata = {
diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c
index ce8ea10..93a1966 100644
--- a/drivers/clk/imx/clk-imx6q.c
+++ b/drivers/clk/imx/clk-imx6q.c
@@ -487,7 +487,7 @@
clk[IMX6QDL_CLK_GPU2D_CORE] = imx_clk_gate2("gpu2d_core", "gpu2d_core_podf", base + 0x6c, 24);
clk[IMX6QDL_CLK_GPU3D_CORE] = imx_clk_gate2("gpu3d_core", "gpu3d_core_podf", base + 0x6c, 26);
clk[IMX6QDL_CLK_HDMI_IAHB] = imx_clk_gate2("hdmi_iahb", "ahb", base + 0x70, 0);
- clk[IMX6QDL_CLK_HDMI_ISFR] = imx_clk_gate2("hdmi_isfr", "video_27m", base + 0x70, 4);
+ clk[IMX6QDL_CLK_HDMI_ISFR] = imx_clk_gate2("hdmi_isfr", "mipi_core_cfg", base + 0x70, 4);
clk[IMX6QDL_CLK_I2C1] = imx_clk_gate2("i2c1", "ipg_per", base + 0x70, 6);
clk[IMX6QDL_CLK_I2C2] = imx_clk_gate2("i2c2", "ipg_per", base + 0x70, 8);
clk[IMX6QDL_CLK_I2C3] = imx_clk_gate2("i2c3", "ipg_per", base + 0x70, 10);
diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h
index 9f24fcf..e425e50 100644
--- a/drivers/clk/mediatek/clk-mtk.h
+++ b/drivers/clk/mediatek/clk-mtk.h
@@ -185,6 +185,7 @@
uint32_t pcw_reg;
int pcw_shift;
const struct mtk_pll_div_table *div_table;
+ const char *parent_name;
};
void mtk_clk_register_plls(struct device_node *node,
diff --git a/drivers/clk/mediatek/clk-pll.c b/drivers/clk/mediatek/clk-pll.c
index 0c2deac..1502384 100644
--- a/drivers/clk/mediatek/clk-pll.c
+++ b/drivers/clk/mediatek/clk-pll.c
@@ -302,7 +302,10 @@
init.name = data->name;
init.ops = &mtk_pll_ops;
- init.parent_names = &parent_name;
+ if (data->parent_name)
+ init.parent_names = &data->parent_name;
+ else
+ init.parent_names = &parent_name;
init.num_parents = 1;
clk = clk_register(NULL, &pll->hw);
diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
index df97e25..9fe0939 100644
--- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
+++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
@@ -608,7 +608,7 @@
0x150, 0, 4, 24, 2, BIT(31),
CLK_SET_RATE_PARENT);
-static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(30), 0);
+static SUNXI_CCU_GATE(hdmi_ddc_clk, "ddc", "osc24M", 0x150, BIT(30), 0);
static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0);
diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c
index 6041bdb..f69f9e8 100644
--- a/drivers/clk/sunxi/clk-sun9i-mmc.c
+++ b/drivers/clk/sunxi/clk-sun9i-mmc.c
@@ -16,6 +16,7 @@
#include <linux/clk.h>
#include <linux/clk-provider.h>
+#include <linux/delay.h>
#include <linux/init.h>
#include <linux/of.h>
#include <linux/of_device.h>
@@ -83,9 +84,20 @@
return 0;
}
+static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev,
+ unsigned long id)
+{
+ sun9i_mmc_reset_assert(rcdev, id);
+ udelay(10);
+ sun9i_mmc_reset_deassert(rcdev, id);
+
+ return 0;
+}
+
static const struct reset_control_ops sun9i_mmc_reset_ops = {
.assert = sun9i_mmc_reset_assert,
.deassert = sun9i_mmc_reset_deassert,
+ .reset = sun9i_mmc_reset_reset,
};
static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
index 8e2db5e..af520d8 100644
--- a/drivers/clk/tegra/clk-tegra30.c
+++ b/drivers/clk/tegra/clk-tegra30.c
@@ -963,7 +963,7 @@
* U71 divider of cclk_lp.
*/
clk = tegra_clk_register_divider("pll_p_out3_cclklp", "pll_p_out3",
- clk_base + SUPER_CCLKG_DIVIDER, 0,
+ clk_base + SUPER_CCLKLP_DIVIDER, 0,
TEGRA_DIVIDER_INT, 16, 8, 1, NULL);
clk_register_clkdev(clk, "pll_p_out3_cclklp", NULL);
diff --git a/drivers/clk/uniphier/clk-uniphier-sys.c b/drivers/clk/uniphier/clk-uniphier-sys.c
index 5d029991..481225a 100644
--- a/drivers/clk/uniphier/clk-uniphier-sys.c
+++ b/drivers/clk/uniphier/clk-uniphier-sys.c
@@ -98,7 +98,7 @@
const struct uniphier_clk_data uniphier_pro5_sys_clk_data[] = {
UNIPHIER_CLK_FACTOR("spll", -1, "ref", 120, 1), /* 2400 MHz */
UNIPHIER_CLK_FACTOR("dapll1", -1, "ref", 128, 1), /* 2560 MHz */
- UNIPHIER_CLK_FACTOR("dapll2", -1, "ref", 144, 125), /* 2949.12 MHz */
+ UNIPHIER_CLK_FACTOR("dapll2", -1, "dapll1", 144, 125), /* 2949.12 MHz */
UNIPHIER_CLK_FACTOR("uart", 0, "dapll2", 1, 40),
UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 48),
UNIPHIER_PRO5_SYS_CLK_SD,
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index e2c6e43..0bed22f 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -305,6 +305,14 @@
This must be disabled for hardware validation purposes to detect any
hardware anomalies of missing events.
+config ARM_ARCH_TIMER_VCT_ACCESS
+ bool "Support for ARM architected timer virtual counter access in userspace"
+ default !ARM64
+ depends on ARM_ARCH_TIMER
+ help
+ This option enables support for reading the ARM architected timer's
+ virtual counter in userspace.
+
config FSL_ERRATUM_A008585
bool "Workaround for Freescale/NXP Erratum A-008585"
default y
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index a2503db..e3bc592 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -449,7 +449,10 @@
| ARCH_TIMER_USR_PCT_ACCESS_EN);
/* Enable user access to the virtual counter */
- cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
+ if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_VCT_ACCESS))
+ cntkctl |= ARCH_TIMER_USR_VCT_ACCESS_EN;
+ else
+ cntkctl &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
arch_timer_set_cntkctl(cntkctl);
}
diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c
index 1b2574c..b167cc6 100644
--- a/drivers/clocksource/timer-stm32.c
+++ b/drivers/clocksource/timer-stm32.c
@@ -16,6 +16,7 @@
#include <linux/of_irq.h>
#include <linux/clk.h>
#include <linux/reset.h>
+#include <linux/slab.h>
#define TIM_CR1 0x00
#define TIM_DIER 0x0c
@@ -106,6 +107,10 @@
unsigned long rate, max_delta;
int irq, ret, bits, prescaler = 1;
+ data = kmemdup(&clock_event_ddata, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
clk = of_clk_get(np, 0);
if (IS_ERR(clk)) {
ret = PTR_ERR(clk);
@@ -156,8 +161,8 @@
writel_relaxed(prescaler - 1, data->base + TIM_PSC);
writel_relaxed(TIM_EGR_UG, data->base + TIM_EGR);
- writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER);
writel_relaxed(0, data->base + TIM_SR);
+ writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER);
data->periodic_top = DIV_ROUND_CLOSEST(rate, prescaler * HZ);
@@ -184,6 +189,7 @@
err_clk_enable:
clk_put(clk);
err_clk_get:
+ kfree(data);
return ret;
}
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index e8fe5b9..1170bfe 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -303,6 +303,7 @@
if MIPS
config LOONGSON2_CPUFREQ
tristate "Loongson2 CPUFreq Driver"
+ depends on LEMOTE_MACH2F
help
This option adds a CPUFreq driver for loongson processors which
support software configurable cpu frequency.
@@ -315,6 +316,7 @@
config LOONGSON1_CPUFREQ
tristate "Loongson1 CPUFreq Driver"
+ depends on LOONGSON1_LS1B
help
This option adds a CPUFreq driver for loongson1 processors which
support software configurable cpu frequency.
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 837a281..434e729 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1022,11 +1022,19 @@
.release = cpufreq_sysfs_release,
};
-static int add_cpu_dev_symlink(struct cpufreq_policy *policy,
- struct device *dev)
+static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu)
{
+ struct device *dev = get_cpu_device(cpu);
+
+ if (!dev)
+ return;
+
+ if (cpumask_test_and_set_cpu(cpu, policy->real_cpus))
+ return;
+
dev_dbg(dev, "%s: Adding symlink\n", __func__);
- return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
+ if (sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"))
+ dev_err(dev, "cpufreq symlink creation failed\n");
}
static void remove_cpu_dev_symlink(struct cpufreq_policy *policy,
@@ -1288,10 +1296,10 @@
policy->user_policy.min = policy->min;
policy->user_policy.max = policy->max;
- write_lock_irqsave(&cpufreq_driver_lock, flags);
- for_each_cpu(j, policy->related_cpus)
+ for_each_cpu(j, policy->related_cpus) {
per_cpu(cpufreq_cpu_data, j) = policy;
- write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+ add_cpu_dev_symlink(policy, j);
+ }
} else {
policy->min = policy->user_policy.min;
policy->max = policy->user_policy.max;
@@ -1388,13 +1396,15 @@
if (cpufreq_driver->exit)
cpufreq_driver->exit(policy);
+
+ for_each_cpu(j, policy->real_cpus)
+ remove_cpu_dev_symlink(policy, get_cpu_device(j));
+
out_free_policy:
cpufreq_policy_free(policy, !new_policy);
return ret;
}
-static int cpufreq_offline(unsigned int cpu);
-
/**
* cpufreq_add_dev - the cpufreq interface for a CPU device.
* @dev: CPU device.
@@ -1416,16 +1426,10 @@
/* Create sysfs link on CPU registration */
policy = per_cpu(cpufreq_cpu_data, cpu);
- if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus))
- return 0;
+ if (policy)
+ add_cpu_dev_symlink(policy, cpu);
- ret = add_cpu_dev_symlink(policy, dev);
- if (ret) {
- cpumask_clear_cpu(cpu, policy->real_cpus);
- cpufreq_offline(cpu);
- }
-
- return ret;
+ return 0;
}
static int cpufreq_offline(unsigned int cpu)
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 7fe442c..854a567 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -164,6 +164,24 @@
drv->state_count += 1;
}
+ /*
+ * On the PowerNV platform cpu_present may be less than cpu_possible in
+ * cases when firmware detects the CPU, but it is not available to the
+ * OS. If CONFIG_HOTPLUG_CPU=n, then such CPUs are not hotplugable at
+ * run time and hence cpu_devices are not created for those CPUs by the
+ * generic topology_init().
+ *
+ * drv->cpumask defaults to cpu_possible_mask in
+ * __cpuidle_driver_init(). This breaks cpuidle on PowerNV where
+ * cpu_devices are not created for CPUs in cpu_possible_mask that
+ * cannot be hot-added later at run time.
+ *
+ * Trying cpuidle_register_device() on a CPU without a cpu_device is
+ * incorrect, so pass a correct CPU mask to the generic cpuidle driver.
+ */
+
+ drv->cpumask = (struct cpumask *)cpu_present_mask;
+
return 0;
}
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 78ab946..439f460 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -189,6 +189,7 @@
return -EBUSY;
}
target_state = &drv->states[index];
+ broadcast = false;
}
/* Take note of the planned idle state. */
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 832a2c3..9e98a5f 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -613,6 +613,18 @@
struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
int error;
+ /*
+ * Return if cpu_device is not setup for this CPU.
+ *
+ * This could happen if the arch did not set up cpu_device
+ * since this CPU is not in cpu_present mask and the
+ * driver did not send a correct CPU mask during registration.
+ * Without this check we would end up passing bogus
+ * value for &cpu_dev->kobj in kobject_init_and_add()
+ */
+ if (!cpu_dev)
+ return -ENODEV;
+
kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
if (!kdev)
return -ENOMEM;
diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h
index ecfdcfe..4f41d6d 100644
--- a/drivers/crypto/amcc/crypto4xx_core.h
+++ b/drivers/crypto/amcc/crypto4xx_core.h
@@ -34,12 +34,12 @@
#define PPC405EX_CE_RESET 0x00000008
#define CRYPTO4XX_CRYPTO_PRIORITY 300
-#define PPC4XX_LAST_PD 63
-#define PPC4XX_NUM_PD 64
-#define PPC4XX_LAST_GD 1023
+#define PPC4XX_NUM_PD 256
+#define PPC4XX_LAST_PD (PPC4XX_NUM_PD - 1)
#define PPC4XX_NUM_GD 1024
-#define PPC4XX_LAST_SD 63
-#define PPC4XX_NUM_SD 64
+#define PPC4XX_LAST_GD (PPC4XX_NUM_GD - 1)
+#define PPC4XX_NUM_SD 256
+#define PPC4XX_LAST_SD (PPC4XX_NUM_SD - 1)
#define PPC4XX_SD_BUFFER_SIZE 2048
#define PD_ENTRY_INUSE 1
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 98468b9..2ca101a 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -228,12 +228,16 @@
* without any error (HW optimizations for later
* CAAM eras), then try again.
*/
- rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
- if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) ||
- !(rdsta_val & (1 << sh_idx)))
- ret = -EAGAIN;
if (ret)
break;
+
+ rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
+ if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) ||
+ !(rdsta_val & (1 << sh_idx))) {
+ ret = -EAGAIN;
+ break;
+ }
+
dev_info(ctrldev, "Instantiated RNG4 SH%d\n", sh_idx);
/* Clear the contents before recreating the descriptor */
memset(desc, 0x00, CAAM_CMD_SZ * 7);
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 5d4c050..e2bcacc 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -41,6 +41,7 @@
struct device *dev;
int ridx;
struct caam_job_ring __iomem *rregs; /* JobR's register space */
+ struct tasklet_struct irqtask;
int irq; /* One per queue */
/* Number of scatterlist crypt transforms active on the JobR */
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 757c27f..9e7f281 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -73,6 +73,8 @@
ret = caam_reset_hw_jr(dev);
+ tasklet_kill(&jrp->irqtask);
+
/* Release interrupt */
free_irq(jrp->irq, dev);
@@ -128,7 +130,7 @@
/*
* Check the output ring for ready responses, kick
- * the threaded irq if jobs done.
+ * tasklet if jobs done.
*/
irqstate = rd_reg32(&jrp->rregs->jrintstatus);
if (!irqstate)
@@ -150,13 +152,18 @@
/* Have valid interrupt at this point, just ACK and trigger */
wr_reg32(&jrp->rregs->jrintstatus, irqstate);
- return IRQ_WAKE_THREAD;
+ preempt_disable();
+ tasklet_schedule(&jrp->irqtask);
+ preempt_enable();
+
+ return IRQ_HANDLED;
}
-static irqreturn_t caam_jr_threadirq(int irq, void *st_dev)
+/* Deferred service handler, run as interrupt-fired tasklet */
+static void caam_jr_dequeue(unsigned long devarg)
{
int hw_idx, sw_idx, i, head, tail;
- struct device *dev = st_dev;
+ struct device *dev = (struct device *)devarg;
struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
u32 *userdesc, userstatus;
@@ -230,8 +237,6 @@
/* reenable / unmask IRQs */
clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
-
- return IRQ_HANDLED;
}
/**
@@ -389,10 +394,11 @@
jrp = dev_get_drvdata(dev);
+ tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
+
/* Connect job ring interrupt handler. */
- error = request_threaded_irq(jrp->irq, caam_jr_interrupt,
- caam_jr_threadirq, IRQF_SHARED,
- dev_name(dev), dev);
+ error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED,
+ dev_name(dev), dev);
if (error) {
dev_err(dev, "can't connect JobR %d interrupt (%d)\n",
jrp->ridx, jrp->irq);
@@ -454,6 +460,7 @@
out_free_irq:
free_irq(jrp->irq, dev);
out_kill_deq:
+ tasklet_kill(&jrp->irqtask);
return error;
}
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index c5aac25..b365ad7 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1620,6 +1620,7 @@
CWQ_ENTRY_SIZE, 0, NULL);
if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) {
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
+ queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
return -ENOMEM;
}
return 0;
@@ -1629,6 +1630,8 @@
{
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]);
+ queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
+ queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL;
}
static int spu_queue_register(struct spu_queue *p, unsigned long q_type)
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index dce1af0..a668286 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -805,8 +805,9 @@
dev_warn(dev, "feed control interrupt is not available.\n");
goto err_irq;
}
- err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt,
- IRQF_SHARED, pdev->name, pdev);
+ err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL,
+ s5p_aes_interrupt, IRQF_ONESHOT,
+ pdev->name, pdev);
if (err < 0) {
dev_warn(dev, "feed control interrupt is not available.\n");
goto err_irq;
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index e2d323f..1c8d79d 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1232,12 +1232,11 @@
sg_link_tbl_len += authsize;
}
- sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc,
- &desc->ptr[4], sg_count, areq->assoclen,
- tbl_off);
+ ret = talitos_sg_map(dev, areq->src, sg_link_tbl_len, edesc,
+ &desc->ptr[4], sg_count, areq->assoclen, tbl_off);
- if (sg_count > 1) {
- tbl_off += sg_count;
+ if (ret > 1) {
+ tbl_off += ret;
sync_needed = true;
}
@@ -1248,14 +1247,15 @@
dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE);
}
- sg_count = talitos_sg_map(dev, areq->dst, cryptlen, edesc,
- &desc->ptr[5], sg_count, areq->assoclen,
- tbl_off);
+ ret = talitos_sg_map(dev, areq->dst, cryptlen, edesc, &desc->ptr[5],
+ sg_count, areq->assoclen, tbl_off);
if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)
to_talitos_ptr_ext_or(&desc->ptr[5], authsize, is_sec1);
- if (sg_count > 1) {
+ /* ICV data */
+ if (ret > 1) {
+ tbl_off += ret;
edesc->icv_ool = true;
sync_needed = true;
@@ -1265,9 +1265,7 @@
sizeof(struct talitos_ptr) + authsize;
/* Add an entry to the link table for ICV data */
- tbl_ptr += sg_count - 1;
- to_talitos_ptr_ext_set(tbl_ptr, 0, is_sec1);
- tbl_ptr++;
+ to_talitos_ptr_ext_set(tbl_ptr - 1, 0, is_sec1);
to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN,
is_sec1);
to_talitos_ptr_len(tbl_ptr, authsize, is_sec1);
@@ -1275,18 +1273,33 @@
/* icv data follows link tables */
to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + offset,
is_sec1);
+ } else {
+ dma_addr_t addr = edesc->dma_link_tbl;
+
+ if (is_sec1)
+ addr += areq->assoclen + cryptlen;
+ else
+ addr += sizeof(struct talitos_ptr) * tbl_off;
+
+ to_talitos_ptr(&desc->ptr[6], addr, is_sec1);
+ to_talitos_ptr_len(&desc->ptr[6], authsize, is_sec1);
+ }
+ } else if (!(desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)) {
+ ret = talitos_sg_map(dev, areq->dst, authsize, edesc,
+ &desc->ptr[6], sg_count, areq->assoclen +
+ cryptlen,
+ tbl_off);
+ if (ret > 1) {
+ tbl_off += ret;
+ edesc->icv_ool = true;
+ sync_needed = true;
+ } else {
+ edesc->icv_ool = false;
}
} else {
edesc->icv_ool = false;
}
- /* ICV data */
- if (!(desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)) {
- to_talitos_ptr_len(&desc->ptr[6], authsize, is_sec1);
- to_talitos_ptr(&desc->ptr[6], edesc->dma_link_tbl +
- areq->assoclen + cryptlen, is_sec1);
- }
-
/* iv out */
if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)
map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv,
@@ -1494,12 +1507,20 @@
const u8 *key, unsigned int keylen)
{
struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+ u32 tmp[DES_EXPKEY_WORDS];
if (keylen > TALITOS_MAX_KEY_SIZE) {
crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
+ if (unlikely(crypto_ablkcipher_get_flags(cipher) &
+ CRYPTO_TFM_REQ_WEAK_KEY) &&
+ !des_ekey(tmp, key)) {
+ crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_WEAK_KEY);
+ return -EINVAL;
+ }
+
memcpy(&ctx->key, key, keylen);
ctx->keylen = keylen;
@@ -2614,7 +2635,7 @@
.ivsize = AES_BLOCK_SIZE,
}
},
- .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+ .desc_hdr_template = DESC_HDR_TYPE_AESU_CTR_NONSNOOP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CTR,
},
@@ -3047,6 +3068,11 @@
t_alg->algt.alg.aead.setkey = aead_setkey;
t_alg->algt.alg.aead.encrypt = aead_encrypt;
t_alg->algt.alg.aead.decrypt = aead_decrypt;
+ if (!(priv->features & TALITOS_FTR_SHA224_HWINIT) &&
+ !strncmp(alg->cra_name, "authenc(hmac(sha224)", 20)) {
+ kfree(t_alg);
+ return ERR_PTR(-ENOTSUPP);
+ }
break;
case CRYPTO_ALG_TYPE_AHASH:
alg = &t_alg->algt.alg.hash.halg.base;
diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 094548b..883b3be 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -280,6 +280,31 @@
EXPORT_SYMBOL(fence_add_callback);
/**
+ * fence_get_status - returns the status upon completion
+ * @fence: [in] the fence to query
+ *
+ * This wraps fence_get_status_locked() to return the error status
+ * condition on a signaled fence. See fence_get_status_locked() for more
+ * details.
+ *
+ * Returns 0 if the fence has not yet been signaled, 1 if the fence has
+ * been signaled without an error condition, or a negative error code
+ * if the fence has been completed in err.
+ */
+int fence_get_status(struct fence *fence)
+{
+ unsigned long flags;
+ int status;
+
+ spin_lock_irqsave(fence->lock, flags);
+ status = fence_get_status_locked(fence);
+ spin_unlock_irqrestore(fence->lock, flags);
+
+ return status;
+}
+EXPORT_SYMBOL(fence_get_status);
+
+/**
* fence_remove_callback - remove a callback from the signaling list
* @fence: [in] the fence to wait on
* @cb: [in] the callback to remove
@@ -529,6 +554,7 @@
fence->context = context;
fence->seqno = seqno;
fence->flags = 0UL;
+ fence->error = 0;
trace_fence_init(fence);
}
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 0cb8d9d..9dc86d3 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -96,9 +96,9 @@
obj->context = fence_context_alloc(1);
strlcpy(obj->name, name, sizeof(obj->name));
- INIT_LIST_HEAD(&obj->child_list_head);
- INIT_LIST_HEAD(&obj->active_list_head);
- spin_lock_init(&obj->child_list_lock);
+ obj->pt_tree = RB_ROOT;
+ INIT_LIST_HEAD(&obj->pt_list);
+ spin_lock_init(&obj->lock);
sync_timeline_debug_add(obj);
@@ -125,68 +125,6 @@
kref_put(&obj->kref, sync_timeline_free);
}
-/**
- * sync_timeline_signal() - signal a status change on a sync_timeline
- * @obj: sync_timeline to signal
- * @inc: num to increment on timeline->value
- *
- * A sync implementation should call this any time one of it's fences
- * has signaled or has an error condition.
- */
-static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
-{
- unsigned long flags;
- struct sync_pt *pt, *next;
-
- trace_sync_timeline(obj);
-
- spin_lock_irqsave(&obj->child_list_lock, flags);
-
- obj->value += inc;
-
- list_for_each_entry_safe(pt, next, &obj->active_list_head,
- active_list) {
- if (fence_is_signaled_locked(&pt->base))
- list_del_init(&pt->active_list);
- }
-
- spin_unlock_irqrestore(&obj->child_list_lock, flags);
-}
-
-/**
- * sync_pt_create() - creates a sync pt
- * @parent: fence's parent sync_timeline
- * @size: size to allocate for this pt
- * @inc: value of the fence
- *
- * Creates a new sync_pt as a child of @parent. @size bytes will be
- * allocated allowing for implementation specific data to be kept after
- * the generic sync_timeline struct. Returns the sync_pt object or
- * NULL in case of error.
- */
-static struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size,
- unsigned int value)
-{
- unsigned long flags;
- struct sync_pt *pt;
-
- if (size < sizeof(*pt))
- return NULL;
-
- pt = kzalloc(size, GFP_KERNEL);
- if (!pt)
- return NULL;
-
- spin_lock_irqsave(&obj->child_list_lock, flags);
- sync_timeline_get(obj);
- fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock,
- obj->context, value);
- list_add_tail(&pt->child_list, &obj->child_list_head);
- INIT_LIST_HEAD(&pt->active_list);
- spin_unlock_irqrestore(&obj->child_list_lock, flags);
- return pt;
-}
-
static const char *timeline_fence_get_driver_name(struct fence *fence)
{
return "sw_sync";
@@ -203,13 +141,17 @@
{
struct sync_pt *pt = fence_to_sync_pt(fence);
struct sync_timeline *parent = fence_parent(fence);
- unsigned long flags;
- spin_lock_irqsave(fence->lock, flags);
- list_del(&pt->child_list);
- if (!list_empty(&pt->active_list))
- list_del(&pt->active_list);
- spin_unlock_irqrestore(fence->lock, flags);
+ if (!list_empty(&pt->link)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(fence->lock, flags);
+ if (!list_empty(&pt->link)) {
+ list_del(&pt->link);
+ rb_erase(&pt->node, &parent->pt_tree);
+ }
+ spin_unlock_irqrestore(fence->lock, flags);
+ }
sync_timeline_put(parent);
fence_free(fence);
@@ -219,18 +161,11 @@
{
struct sync_timeline *parent = fence_parent(fence);
- return (fence->seqno > parent->value) ? false : true;
+ return !__fence_is_later(fence->seqno, parent->value);
}
static bool timeline_fence_enable_signaling(struct fence *fence)
{
- struct sync_pt *pt = fence_to_sync_pt(fence);
- struct sync_timeline *parent = fence_parent(fence);
-
- if (timeline_fence_signaled(fence))
- return false;
-
- list_add_tail(&pt->active_list, &parent->active_list_head);
return true;
}
@@ -238,7 +173,7 @@
{
struct sync_pt *pt = container_of(fence, struct sync_pt, base);
- list_del_init(&pt->active_list);
+ list_del_init(&pt->link);
}
static void timeline_fence_value_str(struct fence *fence,
@@ -267,6 +202,107 @@
.timeline_value_str = timeline_fence_timeline_value_str,
};
+/**
+ * sync_timeline_signal() - signal a status change on a sync_timeline
+ * @obj: sync_timeline to signal
+ * @inc: num to increment on timeline->value
+ *
+ * A sync implementation should call this any time one of it's fences
+ * has signaled or has an error condition.
+ */
+static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
+{
+ struct sync_pt *pt, *next;
+
+ trace_sync_timeline(obj);
+
+ spin_lock_irq(&obj->lock);
+
+ obj->value += inc;
+
+ list_for_each_entry_safe(pt, next, &obj->pt_list, link) {
+ if (!timeline_fence_signaled(&pt->base))
+ break;
+
+ list_del_init(&pt->link);
+ rb_erase(&pt->node, &obj->pt_tree);
+
+ /*
+ * A signal callback may release the last reference to this
+ * fence, causing it to be freed. That operation has to be
+ * last to avoid a use after free inside this loop, and must
+ * be after we remove the fence from the timeline in order to
+ * prevent deadlocking on timeline->lock inside
+ * timeline_fence_release().
+ */
+ fence_signal_locked(&pt->base);
+ }
+
+ spin_unlock_irq(&obj->lock);
+}
+
+/**
+ * sync_pt_create() - creates a sync pt
+ * @parent: fence's parent sync_timeline
+ * @inc: value of the fence
+ *
+ * Creates a new sync_pt as a child of @parent. @size bytes will be
+ * allocated allowing for implementation specific data to be kept after
+ * the generic sync_timeline struct. Returns the sync_pt object or
+ * NULL in case of error.
+ */
+static struct sync_pt *sync_pt_create(struct sync_timeline *obj,
+ unsigned int value)
+{
+ struct sync_pt *pt;
+
+ pt = kzalloc(sizeof(*pt), GFP_KERNEL);
+ if (!pt)
+ return NULL;
+
+ sync_timeline_get(obj);
+ fence_init(&pt->base, &timeline_fence_ops, &obj->lock,
+ obj->context, value);
+ INIT_LIST_HEAD(&pt->link);
+
+ spin_lock_irq(&obj->lock);
+ if (!fence_is_signaled_locked(&pt->base)) {
+ struct rb_node **p = &obj->pt_tree.rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*p) {
+ struct sync_pt *other;
+ int cmp;
+
+ parent = *p;
+ other = rb_entry(parent, typeof(*pt), node);
+ cmp = value - other->base.seqno;
+ if (cmp > 0) {
+ p = &parent->rb_right;
+ } else if (cmp < 0) {
+ p = &parent->rb_left;
+ } else {
+ if (fence_get_rcu(&other->base)) {
+ fence_put(&pt->base);
+ pt = other;
+ goto unlock;
+ }
+ p = &parent->rb_left;
+ }
+ }
+ rb_link_node(&pt->node, parent, p);
+ rb_insert_color(&pt->node, &obj->pt_tree);
+
+ parent = rb_next(&pt->node);
+ list_add_tail(&pt->link,
+ parent ? &rb_entry(parent, typeof(*pt), node)->link : &obj->pt_list);
+ }
+unlock:
+ spin_unlock_irq(&obj->lock);
+
+ return pt;
+}
+
/*
* *WARNING*
*
@@ -293,8 +329,16 @@
static int sw_sync_debugfs_release(struct inode *inode, struct file *file)
{
struct sync_timeline *obj = file->private_data;
+ struct sync_pt *pt, *next;
- smp_wmb();
+ spin_lock_irq(&obj->lock);
+
+ list_for_each_entry_safe(pt, next, &obj->pt_list, link) {
+ fence_set_error(&pt->base, -ENOENT);
+ fence_signal_locked(&pt->base);
+ }
+
+ spin_unlock_irq(&obj->lock);
sync_timeline_put(obj);
return 0;
@@ -317,7 +361,7 @@
goto err;
}
- pt = sync_pt_create(obj, sizeof(*pt), data.value);
+ pt = sync_pt_create(obj, data.value);
if (!pt) {
err = -ENOMEM;
goto err;
@@ -353,6 +397,11 @@
if (copy_from_user(&value, (void __user *)arg, sizeof(value)))
return -EFAULT;
+ while (value > INT_MAX) {
+ sync_timeline_signal(obj, INT_MAX);
+ value -= INT_MAX;
+ }
+
sync_timeline_signal(obj, value);
return 0;
diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c
index 2dd4c3d..858263d 100644
--- a/drivers/dma-buf/sync_debug.c
+++ b/drivers/dma-buf/sync_debug.c
@@ -62,29 +62,29 @@
static const char *sync_status_str(int status)
{
- if (status == 0)
- return "signaled";
+ if (status < 0)
+ return "error";
if (status > 0)
- return "active";
+ return "signaled";
- return "error";
+ return "active";
}
-static void sync_print_fence(struct seq_file *s, struct fence *fence, bool show)
+static void sync_print_fence(struct seq_file *s,
+ struct fence *fence, bool show)
{
- int status = 1;
struct sync_timeline *parent = fence_parent(fence);
+ int status;
- if (fence_is_signaled_locked(fence))
- status = fence->status;
+ status = fence_get_status_locked(fence);
seq_printf(s, " %s%sfence %s",
show ? parent->name : "",
show ? "_" : "",
sync_status_str(status));
- if (status <= 0) {
+ if (status) {
struct timespec64 ts64 =
ktime_to_timespec64(fence->timestamp);
@@ -116,17 +116,15 @@
static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
{
struct list_head *pos;
- unsigned long flags;
seq_printf(s, "%s: %d\n", obj->name, obj->value);
- spin_lock_irqsave(&obj->child_list_lock, flags);
- list_for_each(pos, &obj->child_list_head) {
- struct sync_pt *pt =
- container_of(pos, struct sync_pt, child_list);
+ spin_lock_irq(&obj->lock);
+ list_for_each(pos, &obj->pt_list) {
+ struct sync_pt *pt = container_of(pos, struct sync_pt, link);
sync_print_fence(s, &pt->base, false);
}
- spin_unlock_irqrestore(&obj->child_list_lock, flags);
+ spin_unlock_irq(&obj->lock);
}
static void sync_print_sync_file(struct seq_file *s,
@@ -135,7 +133,7 @@
int i;
seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name,
- sync_status_str(!fence_is_signaled(sync_file->fence)));
+ sync_status_str(fence_get_status(sync_file->fence)));
if (fence_is_array(sync_file->fence)) {
struct fence_array *array = to_fence_array(sync_file->fence);
@@ -149,12 +147,11 @@
static int sync_debugfs_show(struct seq_file *s, void *unused)
{
- unsigned long flags;
struct list_head *pos;
seq_puts(s, "objs:\n--------------\n");
- spin_lock_irqsave(&sync_timeline_list_lock, flags);
+ spin_lock_irq(&sync_timeline_list_lock);
list_for_each(pos, &sync_timeline_list_head) {
struct sync_timeline *obj =
container_of(pos, struct sync_timeline,
@@ -163,11 +160,11 @@
sync_print_obj(s, obj);
seq_puts(s, "\n");
}
- spin_unlock_irqrestore(&sync_timeline_list_lock, flags);
+ spin_unlock_irq(&sync_timeline_list_lock);
seq_puts(s, "fences:\n--------------\n");
- spin_lock_irqsave(&sync_file_list_lock, flags);
+ spin_lock_irq(&sync_file_list_lock);
list_for_each(pos, &sync_file_list_head) {
struct sync_file *sync_file =
container_of(pos, struct sync_file, sync_file_list);
@@ -175,7 +172,7 @@
sync_print_sync_file(s, sync_file);
seq_puts(s, "\n");
}
- spin_unlock_irqrestore(&sync_file_list_lock, flags);
+ spin_unlock_irq(&sync_file_list_lock);
return 0;
}
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h
index d269aa6..9615dc0 100644
--- a/drivers/dma-buf/sync_debug.h
+++ b/drivers/dma-buf/sync_debug.h
@@ -14,6 +14,7 @@
#define _LINUX_SYNC_H
#include <linux/list.h>
+#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/fence.h>
@@ -24,43 +25,41 @@
* struct sync_timeline - sync object
* @kref: reference count on fence.
* @name: name of the sync_timeline. Useful for debugging
- * @child_list_head: list of children sync_pts for this sync_timeline
- * @child_list_lock: lock protecting @child_list_head and fence.status
- * @active_list_head: list of active (unsignaled/errored) sync_pts
+ * @lock: lock protecting @pt_list and @value
+ * @pt_tree: rbtree of active (unsignaled/errored) sync_pts
+ * @pt_list: list of active (unsignaled/errored) sync_pts
* @sync_timeline_list: membership in global sync_timeline_list
*/
struct sync_timeline {
struct kref kref;
char name[32];
- /* protected by child_list_lock */
+ /* protected by lock */
u64 context;
int value;
- struct list_head child_list_head;
- spinlock_t child_list_lock;
-
- struct list_head active_list_head;
+ struct rb_root pt_tree;
+ struct list_head pt_list;
+ spinlock_t lock;
struct list_head sync_timeline_list;
};
static inline struct sync_timeline *fence_parent(struct fence *fence)
{
- return container_of(fence->lock, struct sync_timeline,
- child_list_lock);
+ return container_of(fence->lock, struct sync_timeline, lock);
}
/**
* struct sync_pt - sync_pt object
* @base: base fence object
- * @child_list: sync timeline child's list
- * @active_list: sync timeline active child's list
+ * @link: link on the sync timeline's list
+ * @node: node in the sync timeline's tree
*/
struct sync_pt {
struct fence base;
- struct list_head child_list;
- struct list_head active_list;
+ struct list_head link;
+ struct rb_node node;
};
#ifdef CONFIG_SW_SYNC
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 1beebf8..c835f62 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -370,10 +370,8 @@
sizeof(info->obj_name));
strlcpy(info->driver_name, fence->ops->get_driver_name(fence),
sizeof(info->driver_name));
- if (fence_is_signaled(fence))
- info->status = fence->status >= 0 ? 1 : fence->status;
- else
- info->status = 0;
+
+ info->status = fence_get_status(fence);
info->timestamp_ns = ktime_to_ns(fence->timestamp);
}
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 6b53526..3db94e8 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1107,12 +1107,14 @@
switch (order) {
case 0 ... 1:
return &unmap_pool[0];
+#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID)
case 2 ... 4:
return &unmap_pool[1];
case 5 ... 7:
return &unmap_pool[2];
case 8:
return &unmap_pool[3];
+#endif
default:
BUG();
return NULL;
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index fbb7551..ebe72a4 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -158,6 +158,12 @@
#define PATTERN_OVERWRITE 0x20
#define PATTERN_COUNT_MASK 0x1f
+/* poor man's completion - we want to use wait_event_freezable() on it */
+struct dmatest_done {
+ bool done;
+ wait_queue_head_t *wait;
+};
+
struct dmatest_thread {
struct list_head node;
struct dmatest_info *info;
@@ -166,6 +172,8 @@
u8 **srcs;
u8 **dsts;
enum dma_transaction_type type;
+ wait_queue_head_t done_wait;
+ struct dmatest_done test_done;
bool done;
};
@@ -326,18 +334,25 @@
return error_count;
}
-/* poor man's completion - we want to use wait_event_freezable() on it */
-struct dmatest_done {
- bool done;
- wait_queue_head_t *wait;
-};
static void dmatest_callback(void *arg)
{
struct dmatest_done *done = arg;
-
- done->done = true;
- wake_up_all(done->wait);
+ struct dmatest_thread *thread =
+ container_of(done, struct dmatest_thread, test_done);
+ if (!thread->done) {
+ done->done = true;
+ wake_up_all(done->wait);
+ } else {
+ /*
+ * If thread->done, it means that this callback occurred
+ * after the parent thread has cleaned up. This can
+ * happen in the case that driver doesn't implement
+ * the terminate_all() functionality and a dma operation
+ * did not occur within the timeout period
+ */
+ WARN(1, "dmatest: Kernel memory may be corrupted!!\n");
+ }
}
static unsigned int min_odd(unsigned int x, unsigned int y)
@@ -408,9 +423,8 @@
*/
static int dmatest_func(void *data)
{
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait);
struct dmatest_thread *thread = data;
- struct dmatest_done done = { .wait = &done_wait };
+ struct dmatest_done *done = &thread->test_done;
struct dmatest_info *info;
struct dmatest_params *params;
struct dma_chan *chan;
@@ -637,9 +651,9 @@
continue;
}
- done.done = false;
+ done->done = false;
tx->callback = dmatest_callback;
- tx->callback_param = &done;
+ tx->callback_param = done;
cookie = tx->tx_submit(tx);
if (dma_submit_error(cookie)) {
@@ -652,21 +666,12 @@
}
dma_async_issue_pending(chan);
- wait_event_freezable_timeout(done_wait, done.done,
+ wait_event_freezable_timeout(thread->done_wait, done->done,
msecs_to_jiffies(params->timeout));
status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
- if (!done.done) {
- /*
- * We're leaving the timed out dma operation with
- * dangling pointer to done_wait. To make this
- * correct, we'll need to allocate wait_done for
- * each test iteration and perform "who's gonna
- * free it this time?" dancing. For now, just
- * leave it dangling.
- */
- WARN(1, "dmatest: Kernel stack may be corrupted!!\n");
+ if (!done->done) {
dmaengine_unmap_put(um);
result("test timed out", total_tests, src_off, dst_off,
len, 0);
@@ -747,7 +752,7 @@
dmatest_KBs(runtime, total_len), ret);
/* terminate all transfers on specified channels */
- if (ret)
+ if (ret || failed_tests)
dmaengine_terminate_all(chan);
thread->done = true;
@@ -807,6 +812,8 @@
thread->info = info;
thread->chan = dtc->chan;
thread->type = type;
+ thread->test_done.wait = &thread->done_wait;
+ init_waitqueue_head(&thread->done_wait);
smp_wmb();
thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
dma_chan_name(chan), op, i);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 9f3dbc8..fb2e747 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1694,7 +1694,6 @@
static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330)
{
struct pl330_thread *thrd = NULL;
- unsigned long flags;
int chans, i;
if (pl330->state == DYING)
@@ -1702,8 +1701,6 @@
chans = pl330->pcfg.num_chan;
- spin_lock_irqsave(&pl330->lock, flags);
-
for (i = 0; i < chans; i++) {
thrd = &pl330->channels[i];
if ((thrd->free) && (!_manager_ns(thrd) ||
@@ -1721,8 +1718,6 @@
thrd = NULL;
}
- spin_unlock_irqrestore(&pl330->lock, flags);
-
return thrd;
}
@@ -1740,7 +1735,6 @@
static void pl330_release_channel(struct pl330_thread *thrd)
{
struct pl330_dmac *pl330;
- unsigned long flags;
if (!thrd || thrd->free)
return;
@@ -1752,10 +1746,8 @@
pl330 = thrd->dmac;
- spin_lock_irqsave(&pl330->lock, flags);
_free_event(thrd, thrd->ev);
thrd->free = true;
- spin_unlock_irqrestore(&pl330->lock, flags);
}
/* Initialize the structure for PL330 configuration, that can be used
@@ -2120,20 +2112,20 @@
struct pl330_dmac *pl330 = pch->dmac;
unsigned long flags;
- spin_lock_irqsave(&pch->lock, flags);
+ spin_lock_irqsave(&pl330->lock, flags);
dma_cookie_init(chan);
pch->cyclic = false;
pch->thread = pl330_request_channel(pl330);
if (!pch->thread) {
- spin_unlock_irqrestore(&pch->lock, flags);
+ spin_unlock_irqrestore(&pl330->lock, flags);
return -ENOMEM;
}
tasklet_init(&pch->task, pl330_tasklet, (unsigned long) pch);
- spin_unlock_irqrestore(&pch->lock, flags);
+ spin_unlock_irqrestore(&pl330->lock, flags);
return 1;
}
@@ -2236,12 +2228,13 @@
static void pl330_free_chan_resources(struct dma_chan *chan)
{
struct dma_pl330_chan *pch = to_pchan(chan);
+ struct pl330_dmac *pl330 = pch->dmac;
unsigned long flags;
tasklet_kill(&pch->task);
pm_runtime_get_sync(pch->dmac->ddma.dev);
- spin_lock_irqsave(&pch->lock, flags);
+ spin_lock_irqsave(&pl330->lock, flags);
pl330_release_channel(pch->thread);
pch->thread = NULL;
@@ -2249,7 +2242,7 @@
if (pch->cyclic)
list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool);
- spin_unlock_irqrestore(&pch->lock, flags);
+ spin_unlock_irqrestore(&pl330->lock, flags);
pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
}
diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 307547f..ae3f60b 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -884,7 +884,7 @@
struct virt_dma_desc *vdesc;
enum dma_status status;
unsigned long flags;
- u32 residue;
+ u32 residue = 0;
status = dma_cookie_status(c, cookie, state);
if ((status == DMA_COMPLETE) || (!state))
@@ -892,16 +892,12 @@
spin_lock_irqsave(&chan->vchan.lock, flags);
vdesc = vchan_find_desc(&chan->vchan, cookie);
- if (cookie == chan->desc->vdesc.tx.cookie) {
+ if (chan->desc && cookie == chan->desc->vdesc.tx.cookie)
residue = stm32_dma_desc_residue(chan, chan->desc,
chan->next_sg);
- } else if (vdesc) {
+ else if (vdesc)
residue = stm32_dma_desc_residue(chan,
to_stm32_dma_desc(vdesc), 0);
- } else {
- residue = 0;
- }
-
dma_set_residue(state, residue);
spin_unlock_irqrestore(&chan->vchan.lock, flags);
@@ -976,21 +972,18 @@
struct stm32_dma_chan *chan;
struct dma_chan *c;
- if (dma_spec->args_count < 3)
+ if (dma_spec->args_count < 4)
return NULL;
cfg.channel_id = dma_spec->args[0];
cfg.request_line = dma_spec->args[1];
cfg.stream_config = dma_spec->args[2];
- cfg.threshold = 0;
+ cfg.threshold = dma_spec->args[3];
if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) || (cfg.request_line >=
STM32_DMA_MAX_REQUEST_ID))
return NULL;
- if (dma_spec->args_count > 3)
- cfg.threshold = dma_spec->args[3];
-
chan = &dmadev->chan[cfg.channel_id];
c = dma_get_slave_channel(&chan->vchan.chan);
diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c
index 88a00d0..43e88d8 100644
--- a/drivers/dma/ti-dma-crossbar.c
+++ b/drivers/dma/ti-dma-crossbar.c
@@ -49,12 +49,12 @@
struct ti_am335x_xbar_map {
u16 dma_line;
- u16 mux_val;
+ u8 mux_val;
};
-static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u16 val)
+static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u8 val)
{
- writeb_relaxed(val & 0x1f, iomem + event);
+ writeb_relaxed(val, iomem + event);
}
static void ti_am335x_xbar_free(struct device *dev, void *route_data)
@@ -105,7 +105,7 @@
}
map->dma_line = (u16)dma_spec->args[0];
- map->mux_val = (u16)dma_spec->args[2];
+ map->mux_val = (u8)dma_spec->args[2];
dma_spec->args[2] = 0;
dma_spec->args_count = 2;
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index 72e07e3..16e0eb5 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -227,7 +227,7 @@
#define NREC_RDWR(x) (((x)>>11) & 1)
#define NREC_RANK(x) (((x)>>8) & 0x7)
#define NRECMEMB 0xC0
-#define NREC_CAS(x) (((x)>>16) & 0xFFFFFF)
+#define NREC_CAS(x) (((x)>>16) & 0xFFF)
#define NREC_RAS(x) ((x) & 0x7FFF)
#define NRECFGLOG 0xC4
#define NREEECFBDA 0xC8
@@ -371,7 +371,7 @@
/* These registers are input ONLY if there was a
* Non-Recoverable Error */
u16 nrecmema; /* Non-Recoverable Mem log A */
- u16 nrecmemb; /* Non-Recoverable Mem log B */
+ u32 nrecmemb; /* Non-Recoverable Mem log B */
};
@@ -407,7 +407,7 @@
NERR_FAT_FBD, &info->nerr_fat_fbd);
pci_read_config_word(pvt->branchmap_werrors,
NRECMEMA, &info->nrecmema);
- pci_read_config_word(pvt->branchmap_werrors,
+ pci_read_config_dword(pvt->branchmap_werrors,
NRECMEMB, &info->nrecmemb);
/* Clear the error bits, by writing them back */
@@ -1293,7 +1293,7 @@
dimm->mtype = MEM_FB_DDR2;
/* ask what device type on this row */
- if (MTR_DRAM_WIDTH(mtr))
+ if (MTR_DRAM_WIDTH(mtr) == 8)
dimm->dtype = DEV_X8;
else
dimm->dtype = DEV_X4;
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 6ef6ad1..2ea2f32 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -368,7 +368,7 @@
/* These registers are input ONLY if there was a Non-Rec Error */
u16 nrecmema; /* Non-Recoverable Mem log A */
- u16 nrecmemb; /* Non-Recoverable Mem log B */
+ u32 nrecmemb; /* Non-Recoverable Mem log B */
};
@@ -458,7 +458,7 @@
NERR_FAT_FBD, &info->nerr_fat_fbd);
pci_read_config_word(pvt->branchmap_werrors,
NRECMEMA, &info->nrecmema);
- pci_read_config_word(pvt->branchmap_werrors,
+ pci_read_config_dword(pvt->branchmap_werrors,
NRECMEMB, &info->nrecmemb);
/* Clear the error bits, by writing them back */
@@ -1207,13 +1207,14 @@
dimm->nr_pages = size_mb << 8;
dimm->grain = 8;
- dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4;
+ dimm->dtype = MTR_DRAM_WIDTH(mtr) == 8 ?
+ DEV_X8 : DEV_X4;
dimm->mtype = MEM_FB_DDR2;
/*
* The eccc mechanism is SDDC (aka SECC), with
* is similar to Chipkill.
*/
- dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ?
+ dimm->edac_mode = MTR_DRAM_WIDTH(mtr) == 8 ?
EDAC_S8ECD8ED : EDAC_S4ECD4ED;
ndimms++;
}
diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c
index cda6dab..6b65a10 100644
--- a/drivers/edac/octeon_edac-lmc.c
+++ b/drivers/edac/octeon_edac-lmc.c
@@ -79,6 +79,7 @@
if (!pvt->inject)
int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx));
else {
+ int_reg.u64 = 0;
if (pvt->error_type == 1)
int_reg.s.sec_err = 1;
if (pvt->error_type == 2)
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 5477522..3c47e63 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -2510,6 +2510,7 @@
break;
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
pvt->pci_ta = pdev;
+ break;
case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS:
pvt->pci_ras = pdev;
break;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index a4944e2..2f47c5b 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -120,8 +120,7 @@
return str - buf;
}
-static struct kobj_attribute efi_attr_systab =
- __ATTR(systab, 0400, systab_show, NULL);
+static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400);
#define EFI_FIELD(var) efi.var
@@ -385,7 +384,6 @@
return 0;
}
}
- pr_err_once("requested map not found.\n");
return -ENOENT;
}
diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c
index 1491407..311c9d0 100644
--- a/drivers/firmware/efi/esrt.c
+++ b/drivers/firmware/efi/esrt.c
@@ -106,7 +106,7 @@
};
/* Generic ESRT Entry ("ESRE") support. */
-static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf)
+static ssize_t fw_class_show(struct esre_entry *entry, char *buf)
{
char *str = buf;
@@ -117,18 +117,16 @@
return str - buf;
}
-static struct esre_attribute esre_fw_class = __ATTR(fw_class, 0400,
- esre_fw_class_show, NULL);
+static struct esre_attribute esre_fw_class = __ATTR_RO_MODE(fw_class, 0400);
#define esre_attr_decl(name, size, fmt) \
-static ssize_t esre_##name##_show(struct esre_entry *entry, char *buf) \
+static ssize_t name##_show(struct esre_entry *entry, char *buf) \
{ \
return sprintf(buf, fmt "\n", \
le##size##_to_cpu(entry->esre.esre1->name)); \
} \
\
-static struct esre_attribute esre_##name = __ATTR(name, 0400, \
- esre_##name##_show, NULL)
+static struct esre_attribute esre_##name = __ATTR_RO_MODE(name, 0400)
esre_attr_decl(fw_type, 32, "%u");
esre_attr_decl(fw_version, 32, "%u");
@@ -193,14 +191,13 @@
/* support for displaying ESRT fields at the top level */
#define esrt_attr_decl(name, size, fmt) \
-static ssize_t esrt_##name##_show(struct kobject *kobj, \
+static ssize_t name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, char *buf)\
{ \
return sprintf(buf, fmt "\n", le##size##_to_cpu(esrt->name)); \
} \
\
-static struct kobj_attribute esrt_##name = __ATTR(name, 0400, \
- esrt_##name##_show, NULL)
+static struct kobj_attribute esrt_##name = __ATTR_RO_MODE(name, 0400)
esrt_attr_decl(fw_resource_count, 32, "%u");
esrt_attr_decl(fw_resource_count_max, 32, "%u");
@@ -254,7 +251,7 @@
rc = efi_mem_desc_lookup(efi.esrt, &md);
if (rc < 0) {
- pr_err("ESRT header is not in the memory map.\n");
+ pr_warn("ESRT header is not in the memory map.\n");
return;
}
@@ -431,7 +428,7 @@
err_remove_esrt:
kobject_put(esrt_kobj);
err:
- kfree(esrt);
+ memunmap(esrt);
esrt = NULL;
return error;
}
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
index 8e64b77..f377609 100644
--- a/drivers/firmware/efi/runtime-map.c
+++ b/drivers/firmware/efi/runtime-map.c
@@ -63,11 +63,11 @@
return map_attr->show(entry, buf);
}
-static struct map_attribute map_type_attr = __ATTR_RO(type);
-static struct map_attribute map_phys_addr_attr = __ATTR_RO(phys_addr);
-static struct map_attribute map_virt_addr_attr = __ATTR_RO(virt_addr);
-static struct map_attribute map_num_pages_attr = __ATTR_RO(num_pages);
-static struct map_attribute map_attribute_attr = __ATTR_RO(attribute);
+static struct map_attribute map_type_attr = __ATTR_RO_MODE(type, 0400);
+static struct map_attribute map_phys_addr_attr = __ATTR_RO_MODE(phys_addr, 0400);
+static struct map_attribute map_virt_addr_attr = __ATTR_RO_MODE(virt_addr, 0400);
+static struct map_attribute map_num_pages_attr = __ATTR_RO_MODE(num_pages, 0400);
+static struct map_attribute map_attribute_attr = __ATTR_RO_MODE(attribute, 0400);
/*
* These are default attributes that are added for every memmap entry.
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c
index 5bddbd5..3fe6a21 100644
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -90,21 +90,18 @@
altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d));
- if (type == IRQ_TYPE_NONE)
+ if (type == IRQ_TYPE_NONE) {
+ irq_set_handler_locked(d, handle_bad_irq);
return 0;
- if (type == IRQ_TYPE_LEVEL_HIGH &&
- altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH)
+ }
+ if (type == altera_gc->interrupt_trigger) {
+ if (type == IRQ_TYPE_LEVEL_HIGH)
+ irq_set_handler_locked(d, handle_level_irq);
+ else
+ irq_set_handler_locked(d, handle_simple_irq);
return 0;
- if (type == IRQ_TYPE_EDGE_RISING &&
- altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING)
- return 0;
- if (type == IRQ_TYPE_EDGE_FALLING &&
- altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING)
- return 0;
- if (type == IRQ_TYPE_EDGE_BOTH &&
- altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH)
- return 0;
-
+ }
+ irq_set_handler_locked(d, handle_bad_irq);
return -EINVAL;
}
@@ -230,7 +227,6 @@
chained_irq_exit(chip, desc);
}
-
static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc)
{
struct altera_gpio_chip *altera_gc;
@@ -310,7 +306,7 @@
altera_gc->interrupt_trigger = reg;
ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0,
- handle_simple_irq, IRQ_TYPE_NONE);
+ handle_bad_irq, IRQ_TYPE_NONE);
if (ret) {
dev_err(&pdev->dev, "could not add irqchip\n");
diff --git a/drivers/gpio/gpio-ath79.c b/drivers/gpio/gpio-ath79.c
index dc37dbe..a83e97e 100644
--- a/drivers/gpio/gpio-ath79.c
+++ b/drivers/gpio/gpio-ath79.c
@@ -323,3 +323,6 @@
};
module_platform_driver(ath79_gpio_driver);
+
+MODULE_DESCRIPTION("Atheros AR71XX/AR724X/AR913X GPIO API support");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpio/gpio-iop.c b/drivers/gpio/gpio-iop.c
index 98c7ff2..8d62db4 100644
--- a/drivers/gpio/gpio-iop.c
+++ b/drivers/gpio/gpio-iop.c
@@ -58,3 +58,7 @@
return platform_driver_register(&iop3xx_gpio_driver);
}
arch_initcall(iop3xx_gpio_init);
+
+MODULE_DESCRIPTION("GPIO handling for Intel IOP3xx processors");
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c
index adba614..abb5a27 100644
--- a/drivers/gpio/gpio-stmpe.c
+++ b/drivers/gpio/gpio-stmpe.c
@@ -190,6 +190,16 @@
};
int i, j;
+ /*
+ * STMPE1600: to be able to get IRQ from pins,
+ * a read must be done on GPMR register, or a write in
+ * GPSR or GPCR registers
+ */
+ if (stmpe->partnum == STMPE1600) {
+ stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_LSB]);
+ stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_CSB]);
+ }
+
for (i = 0; i < CACHE_NR_REGS; i++) {
/* STMPE801 and STMPE1600 don't have RE and FE registers */
if ((stmpe->partnum == STMPE801 ||
@@ -227,21 +237,11 @@
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct stmpe_gpio *stmpe_gpio = gpiochip_get_data(gc);
- struct stmpe *stmpe = stmpe_gpio->stmpe;
int offset = d->hwirq;
int regoffset = offset / 8;
int mask = BIT(offset % 8);
stmpe_gpio->regs[REG_IE][regoffset] |= mask;
-
- /*
- * STMPE1600 workaround: to be able to get IRQ from pins,
- * a read must be done on GPMR register, or a write in
- * GPSR or GPCR registers
- */
- if (stmpe->partnum == STMPE1600)
- stmpe_reg_read(stmpe,
- stmpe->regs[STMPE_IDX_GPMR_LSB + regoffset]);
}
static void stmpe_dbg_show_one(struct seq_file *s,
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 063d176..f3c3680 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -705,6 +705,9 @@
struct gpioevent_data ge;
int ret, level;
+ /* Do not leak kernel stack to userspace */
+ memset(&ge, 0, sizeof(ge));
+
ge.timestamp = ktime_get_real_ns();
level = gpiod_get_value_cansleep(le->desc);
diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile
index 8363cb5..8a08e81 100644
--- a/drivers/gpu/drm/amd/acp/Makefile
+++ b/drivers/gpu/drm/amd/acp/Makefile
@@ -3,6 +3,4 @@
# of AMDSOC/AMDGPU drm driver.
# It provides the HW control for ACP related functionalities.
-subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include
-
AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 1a0a5f7..47951f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -367,29 +367,50 @@
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
+ unsigned long end_jiffies;
uint32_t sdma_base_addr;
+ uint32_t data;
m = get_sdma_mqd(mqd);
sdma_base_addr = get_sdma_base_addr(m);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
- m->sdma_rlc_virtual_addr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE,
- m->sdma_rlc_rb_base);
-
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
- m->sdma_rlc_rb_base_hi);
-
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
- m->sdma_rlc_rb_rptr_addr_lo);
-
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
- m->sdma_rlc_rb_rptr_addr_hi);
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+ usleep_range(500, 1000);
+ }
+ if (m->sdma_engine_id) {
+ data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
+ data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
+ RESUME_CTX, 0);
+ WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
+ } else {
+ data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
+ data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+ RESUME_CTX, 0);
+ WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
+ }
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
- m->sdma_rlc_doorbell);
-
+ m->sdma_rlc_doorbell);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+ m->sdma_rlc_virtual_addr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdma_rlc_rb_base_hi);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdma_rlc_rb_rptr_addr_lo);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdma_rlc_rb_rptr_addr_hi);
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
m->sdma_rlc_rb_cntl);
@@ -493,9 +514,9 @@
}
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index f8fdbd1..26afdff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1788,34 +1788,32 @@
WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]);
}
-/* Atom needs data in little endian format
- * so swap as appropriate when copying data to
- * or from atom. Note that atom operates on
- * dw units.
+/* Atom needs data in little endian format so swap as appropriate when copying
+ * data to or from atom. Note that atom operates on dw units.
+ *
+ * Use to_le=true when sending data to atom and provide at least
+ * ALIGN(num_bytes,4) bytes in the dst buffer.
+ *
+ * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4)
+ * byes in the src buffer.
*/
void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
{
#ifdef __BIG_ENDIAN
- u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */
- u32 *dst32, *src32;
+ u32 src_tmp[5], dst_tmp[5];
int i;
+ u8 align_num_bytes = ALIGN(num_bytes, 4);
- memcpy(src_tmp, src, num_bytes);
- src32 = (u32 *)src_tmp;
- dst32 = (u32 *)dst_tmp;
if (to_le) {
- for (i = 0; i < ((num_bytes + 3) / 4); i++)
- dst32[i] = cpu_to_le32(src32[i]);
- memcpy(dst, dst_tmp, num_bytes);
+ memcpy(src_tmp, src, num_bytes);
+ for (i = 0; i < align_num_bytes / 4; i++)
+ dst_tmp[i] = cpu_to_le32(src_tmp[i]);
+ memcpy(dst, dst_tmp, align_num_bytes);
} else {
- u8 dws = num_bytes & ~3;
- for (i = 0; i < ((num_bytes + 3) / 4); i++)
- dst32[i] = le32_to_cpu(src32[i]);
- memcpy(dst, dst_tmp, dws);
- if (num_bytes % 4) {
- for (i = 0; i < (num_bytes % 4); i++)
- dst[dws+i] = dst_tmp[dws+i];
- }
+ memcpy(src_tmp, src, align_num_bytes);
+ for (i = 0; i < align_num_bytes / 4; i++)
+ dst_tmp[i] = le32_to_cpu(src_tmp[i]);
+ memcpy(dst, dst_tmp, num_bytes);
}
#else
memcpy(dst, src, num_bytes);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index bfb4b91..cb505f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -240,6 +240,8 @@
for (; i >= 0; i--)
drm_free_large(p->chunks[i].kdata);
kfree(p->chunks);
+ p->chunks = NULL;
+ p->nchunks = 0;
put_ctx:
amdgpu_ctx_put(p->ctx);
free_chunk:
@@ -414,6 +416,10 @@
if (candidate == lobj)
break;
+ /* We can't move pinned BOs here */
+ if (bo->pin_count)
+ continue;
+
other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
/* Check if this BO is in one of the domains we need space for */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e41d4ba..ce9797b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2020,8 +2020,11 @@
}
r = amdgpu_late_init(adev);
- if (r)
+ if (r) {
+ if (fbcon)
+ console_unlock();
return r;
+ }
/* pin cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 743a12d..3bb2b9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -648,7 +648,7 @@
uint32_t allocated = 0;
uint32_t tmp, handle = 0;
uint32_t *size = &tmp;
- int i, r, idx = 0;
+ int i, r = 0, idx = 0;
r = amdgpu_cs_sysvm_access_required(p);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 968c426..4750375 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -744,7 +744,7 @@
int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
- int r;
+ int r = 0;
r = amdgpu_vm_update_pd_or_shadow(adev, vm, true);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index c2bd9f0..6d75fd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -565,11 +565,8 @@
static void dce_virtual_encoder_destroy(struct drm_encoder *encoder)
{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
-
- kfree(amdgpu_encoder->enc_priv);
drm_encoder_cleanup(encoder);
- kfree(amdgpu_encoder);
+ kfree(encoder);
}
static const struct drm_encoder_funcs dce_virtual_encoder_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 50f0cf2..7522f79 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -182,7 +182,7 @@
WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
- data &= ~0xffc00000;
+ data &= ~0x3ff;
WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index d83de98..8577a56 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -215,8 +215,8 @@
BUG_ON(!mm || !mqd || !q);
m = get_sdma_mqd(mqd);
- m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) <<
- SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+ << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index e1fb40b..5425c68 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -205,6 +205,24 @@
switch (type) {
case KFD_QUEUE_TYPE_SDMA:
+ if (dev->dqm->queue_count >=
+ CIK_SDMA_QUEUES_PER_ENGINE * CIK_SDMA_ENGINE_NUM) {
+ pr_err("Over-subscription is not allowed for SDMA.\n");
+ retval = -EPERM;
+ goto err_create_queue;
+ }
+
+ retval = create_cp_queue(pqm, dev, &q, properties, f, *qid);
+ if (retval != 0)
+ goto err_create_queue;
+ pqn->q = q;
+ pqn->kq = NULL;
+ retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
+ &q->properties.vmid);
+ pr_debug("DQM returned %d for create_queue\n", retval);
+ print_queue(q);
+ break;
+
case KFD_QUEUE_TYPE_COMPUTE:
/* check if there is over subscription */
if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
index 4477c55..a8b59b3 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
@@ -850,9 +850,9 @@
const ATOM_Tonga_POWERPLAYTABLE *powerplay_table)
{
hwmgr->platform_descriptor.overdriveLimit.engineClock =
- le16_to_cpu(powerplay_table->ulMaxODEngineClock);
+ le32_to_cpu(powerplay_table->ulMaxODEngineClock);
hwmgr->platform_descriptor.overdriveLimit.memoryClock =
- le16_to_cpu(powerplay_table->ulMaxODMemoryClock);
+ le32_to_cpu(powerplay_table->ulMaxODMemoryClock);
hwmgr->platform_descriptor.minOverdriveVDDC = 0;
hwmgr->platform_descriptor.maxOverdriveVDDC = 0;
diff --git a/drivers/gpu/drm/armada/Makefile b/drivers/gpu/drm/armada/Makefile
index 26412d2..ffd6736 100644
--- a/drivers/gpu/drm/armada/Makefile
+++ b/drivers/gpu/drm/armada/Makefile
@@ -4,5 +4,3 @@
armada-$(CONFIG_DEBUG_FS) += armada_debugfs.o
obj-$(CONFIG_DRM_ARMADA) := armada.o
-
-CFLAGS_armada_trace.o := -I$(src)
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index 44d476e..f64f35c 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -97,7 +97,7 @@
#define DP0_ACTIVEVAL 0x0650
#define DP0_SYNCVAL 0x0654
#define DP0_MISC 0x0658
-#define TU_SIZE_RECOMMENDED (0x3f << 16) /* LSCLK cycles per TU */
+#define TU_SIZE_RECOMMENDED (63) /* LSCLK cycles per TU */
#define BPC_6 (0 << 5)
#define BPC_8 (1 << 5)
@@ -318,7 +318,7 @@
tmp = (tmp << 8) | buf[i];
i++;
if (((i % 4) == 0) || (i == size)) {
- tc_write(DP0_AUXWDATA(i >> 2), tmp);
+ tc_write(DP0_AUXWDATA((i - 1) >> 2), tmp);
tmp = 0;
}
}
@@ -603,8 +603,15 @@
ret = drm_dp_link_probe(&tc->aux, &tc->link.base);
if (ret < 0)
goto err_dpcd_read;
- if ((tc->link.base.rate != 162000) && (tc->link.base.rate != 270000))
- goto err_dpcd_inval;
+ if (tc->link.base.rate != 162000 && tc->link.base.rate != 270000) {
+ dev_dbg(tc->dev, "Falling to 2.7 Gbps rate\n");
+ tc->link.base.rate = 270000;
+ }
+
+ if (tc->link.base.num_lanes > 2) {
+ dev_dbg(tc->dev, "Falling to 2 lanes\n");
+ tc->link.base.num_lanes = 2;
+ }
ret = drm_dp_dpcd_readb(&tc->aux, DP_MAX_DOWNSPREAD, tmp);
if (ret < 0)
@@ -637,9 +644,6 @@
err_dpcd_read:
dev_err(tc->dev, "failed to read DPCD: %d\n", ret);
return ret;
-err_dpcd_inval:
- dev_err(tc->dev, "invalid DPCD\n");
- return -EINVAL;
}
static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode)
@@ -655,6 +659,14 @@
int lower_margin = mode->vsync_start - mode->vdisplay;
int vsync_len = mode->vsync_end - mode->vsync_start;
+ /*
+ * Recommended maximum number of symbols transferred in a transfer unit:
+ * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size,
+ * (output active video bandwidth in bytes))
+ * Must be less than tu_size.
+ */
+ max_tu_symbol = TU_SIZE_RECOMMENDED - 1;
+
dev_dbg(tc->dev, "set mode %dx%d\n",
mode->hdisplay, mode->vdisplay);
dev_dbg(tc->dev, "H margin %d,%d sync %d\n",
@@ -664,13 +676,18 @@
dev_dbg(tc->dev, "total: %dx%d\n", mode->htotal, mode->vtotal);
- /* LCD Ctl Frame Size */
- tc_write(VPCTRL0, (0x40 << 20) /* VSDELAY */ |
+ /*
+ * LCD Ctl Frame Size
+ * datasheet is not clear of vsdelay in case of DPI
+ * assume we do not need any delay when DPI is a source of
+ * sync signals
+ */
+ tc_write(VPCTRL0, (0 << 20) /* VSDELAY */ |
OPXLFMT_RGB888 | FRMSYNC_DISABLED | MSF_DISABLED);
- tc_write(HTIM01, (left_margin << 16) | /* H back porch */
- (hsync_len << 0)); /* Hsync */
- tc_write(HTIM02, (right_margin << 16) | /* H front porch */
- (mode->hdisplay << 0)); /* width */
+ tc_write(HTIM01, (ALIGN(left_margin, 2) << 16) | /* H back porch */
+ (ALIGN(hsync_len, 2) << 0)); /* Hsync */
+ tc_write(HTIM02, (ALIGN(right_margin, 2) << 16) | /* H front porch */
+ (ALIGN(mode->hdisplay, 2) << 0)); /* width */
tc_write(VTIM01, (upper_margin << 16) | /* V back porch */
(vsync_len << 0)); /* Vsync */
tc_write(VTIM02, (lower_margin << 16) | /* V front porch */
@@ -689,7 +706,7 @@
/* DP Main Stream Attributes */
vid_sync_dly = hsync_len + left_margin + mode->hdisplay;
tc_write(DP0_VIDSYNCDELAY,
- (0x003e << 16) | /* thresh_dly */
+ (max_tu_symbol << 16) | /* thresh_dly */
(vid_sync_dly << 0));
tc_write(DP0_TOTALVAL, (mode->vtotal << 16) | (mode->htotal));
@@ -705,14 +722,8 @@
tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW |
DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888);
- /*
- * Recommended maximum number of symbols transferred in a transfer unit:
- * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size,
- * (output active video bandwidth in bytes))
- * Must be less than tu_size.
- */
- max_tu_symbol = TU_SIZE_RECOMMENDED - 1;
- tc_write(DP0_MISC, (max_tu_symbol << 23) | TU_SIZE_RECOMMENDED | BPC_8);
+ tc_write(DP0_MISC, (max_tu_symbol << 23) | (TU_SIZE_RECOMMENDED << 16) |
+ BPC_8);
return 0;
err:
@@ -808,8 +819,6 @@
unsigned int rate;
u32 dp_phy_ctrl;
int timeout;
- bool aligned;
- bool ready;
u32 value;
int ret;
u8 tmp[8];
@@ -954,16 +963,15 @@
ret = drm_dp_dpcd_read_link_status(aux, tmp + 2);
if (ret < 0)
goto err_dpcd_read;
- ready = (tmp[2] == ((DP_CHANNEL_EQ_BITS << 4) | /* Lane1 */
- DP_CHANNEL_EQ_BITS)); /* Lane0 */
- aligned = tmp[4] & DP_INTERLANE_ALIGN_DONE;
- } while ((--timeout) && !(ready && aligned));
+ } while ((--timeout) &&
+ !(drm_dp_channel_eq_ok(tmp + 2, tc->link.base.num_lanes)));
if (timeout == 0) {
/* Read DPCD 0x200-0x201 */
ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT, tmp, 2);
if (ret < 0)
goto err_dpcd_read;
+ dev_err(dev, "channel(s) EQ not ok\n");
dev_info(dev, "0x0200 SINK_COUNT: 0x%02x\n", tmp[0]);
dev_info(dev, "0x0201 DEVICE_SERVICE_IRQ_VECTOR: 0x%02x\n",
tmp[1]);
@@ -974,10 +982,6 @@
dev_info(dev, "0x0206 ADJUST_REQUEST_LANE0_1: 0x%02x\n",
tmp[6]);
- if (!ready)
- dev_err(dev, "Lane0/1 not ready\n");
- if (!aligned)
- dev_err(dev, "Lane0/1 not aligned\n");
return -EAGAIN;
}
@@ -1105,7 +1109,10 @@
static int tc_connector_mode_valid(struct drm_connector *connector,
struct drm_display_mode *mode)
{
- /* Accept any mode */
+ /* DPI interface clock limitation: upto 154 MHz */
+ if (mode->clock > 154000)
+ return MODE_CLOCK_HIGH;
+
return MODE_OK;
}
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 6ca1f31..6dd09c3 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -46,7 +46,8 @@
BIT_CLKS_ENABLED,
BIT_IRQS_ENABLED,
BIT_WIN_UPDATED,
- BIT_SUSPENDED
+ BIT_SUSPENDED,
+ BIT_REQUEST_UPDATE
};
struct decon_context {
@@ -315,6 +316,7 @@
/* window enable */
decon_set_bits(ctx, DECON_WINCONx(win), WINCONx_ENWIN_F, ~0);
+ set_bit(BIT_REQUEST_UPDATE, &ctx->flags);
}
static void decon_disable_plane(struct exynos_drm_crtc *crtc,
@@ -327,6 +329,7 @@
return;
decon_set_bits(ctx, DECON_WINCONx(win), WINCONx_ENWIN_F, 0);
+ set_bit(BIT_REQUEST_UPDATE, &ctx->flags);
}
static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
@@ -340,8 +343,8 @@
for (i = ctx->first_win; i < WINDOWS_NR; i++)
decon_shadow_protect_win(ctx, i, false);
- /* standalone update */
- decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0);
+ if (test_and_clear_bit(BIT_REQUEST_UPDATE, &ctx->flags))
+ decon_set_bits(ctx, DECON_UPDATE, STANDALONE_UPDATE_F, ~0);
if (ctx->out_type & IFTYPE_I80)
set_bit(BIT_WIN_UPDATED, &ctx->flags);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index f2ae72b..2abc47b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -246,6 +246,15 @@
if (IS_ERR(exynos_gem))
return exynos_gem;
+ if (!is_drm_iommu_supported(dev) && (flags & EXYNOS_BO_NONCONTIG)) {
+ /*
+ * when no IOMMU is available, all allocated buffers are
+ * contiguous anyway, so drop EXYNOS_BO_NONCONTIG flag
+ */
+ flags &= ~EXYNOS_BO_NONCONTIG;
+ DRM_WARN("Non-contiguous allocation is not supported without IOMMU, falling back to contiguous buffer\n");
+ }
+
/* set memory type and cache attribute from user side. */
exynos_gem->flags = flags;
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
index cc2fde2..c9eef0f 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
@@ -243,7 +243,6 @@
return PTR_ERR(fsl_dev->state);
}
- clk_disable_unprepare(fsl_dev->pix_clk);
clk_disable_unprepare(fsl_dev->clk);
return 0;
@@ -266,6 +265,7 @@
if (fsl_dev->tcon)
fsl_tcon_bypass_enable(fsl_dev->tcon);
fsl_dcu_drm_init_planes(fsl_dev->drm);
+ enable_irq(fsl_dev->irq);
drm_atomic_helper_resume(fsl_dev->drm, fsl_dev->state);
console_lock();
@@ -273,7 +273,6 @@
console_unlock();
drm_kms_helper_poll_enable(fsl_dev->drm);
- enable_irq(fsl_dev->irq);
return 0;
}
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
index 7e7a4d4..0f563c9 100644
--- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
@@ -521,9 +521,12 @@
{
struct ade_crtc *acrtc = to_ade_crtc(crtc);
struct ade_hw_ctx *ctx = acrtc->ctx;
+ struct drm_display_mode *mode = &crtc->state->mode;
+ struct drm_display_mode *adj_mode = &crtc->state->adjusted_mode;
if (!ctx->power_on)
(void)ade_power_up(ctx);
+ ade_ldi_set_mode(acrtc, mode, adj_mode);
}
static void ade_crtc_atomic_flush(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 7fdc42e..74163a9 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -5063,6 +5063,12 @@
*/
final->t8 = 1;
final->t9 = 1;
+
+ /*
+ * HW has only a 100msec granularity for t11_t12 so round it up
+ * accordingly.
+ */
+ final->t11_t12 = roundup(final->t11_t12, 100 * 10);
}
static void
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 79aab9a..6769aa1 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -430,7 +430,9 @@
gmbus_is_index_read(struct i2c_msg *msgs, int i, int num)
{
return (i + 1 < num &&
- !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 &&
+ msgs[i].addr == msgs[i + 1].addr &&
+ !(msgs[i].flags & I2C_M_RD) &&
+ (msgs[i].len == 1 || msgs[i].len == 2) &&
(msgs[i + 1].flags & I2C_M_RD));
}
diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
index 4ceed7a9..4b83e9e 100644
--- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
+++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
@@ -638,7 +638,8 @@
match = of_match_node(dmm_of_match, dev->dev.of_node);
if (!match) {
dev_err(&dev->dev, "failed to find matching device node\n");
- return -ENODEV;
+ ret = -ENODEV;
+ goto fail;
}
omap_dmm->plat_data = match->data;
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index af267c3..ee5883f 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -147,9 +147,6 @@
struct drm_gem_object *obj = buffer->priv;
int ret = 0;
- if (WARN_ON(!obj->filp))
- return -EINVAL;
-
ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 27cb424..6f65846 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -369,6 +369,7 @@
drm_panel_remove(&panel->base);
panel_simple_disable(&panel->base);
+ panel_simple_unprepare(&panel->base);
if (panel->ddc)
put_device(&panel->ddc->dev);
@@ -384,6 +385,7 @@
struct panel_simple *panel = dev_get_drvdata(dev);
panel_simple_disable(&panel->base);
+ panel_simple_unprepare(&panel->base);
}
static const struct drm_display_mode ampire_am800480r3tmqwa1h_mode = {
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 432cb46..fd7682b 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -45,34 +45,32 @@
/***** radeon AUX functions *****/
-/* Atom needs data in little endian format
- * so swap as appropriate when copying data to
- * or from atom. Note that atom operates on
- * dw units.
+/* Atom needs data in little endian format so swap as appropriate when copying
+ * data to or from atom. Note that atom operates on dw units.
+ *
+ * Use to_le=true when sending data to atom and provide at least
+ * ALIGN(num_bytes,4) bytes in the dst buffer.
+ *
+ * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4)
+ * byes in the src buffer.
*/
void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
{
#ifdef __BIG_ENDIAN
- u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */
- u32 *dst32, *src32;
+ u32 src_tmp[5], dst_tmp[5];
int i;
+ u8 align_num_bytes = ALIGN(num_bytes, 4);
- memcpy(src_tmp, src, num_bytes);
- src32 = (u32 *)src_tmp;
- dst32 = (u32 *)dst_tmp;
if (to_le) {
- for (i = 0; i < ((num_bytes + 3) / 4); i++)
- dst32[i] = cpu_to_le32(src32[i]);
- memcpy(dst, dst_tmp, num_bytes);
+ memcpy(src_tmp, src, num_bytes);
+ for (i = 0; i < align_num_bytes / 4; i++)
+ dst_tmp[i] = cpu_to_le32(src_tmp[i]);
+ memcpy(dst, dst_tmp, align_num_bytes);
} else {
- u8 dws = num_bytes & ~3;
- for (i = 0; i < ((num_bytes + 3) / 4); i++)
- dst32[i] = le32_to_cpu(src32[i]);
- memcpy(dst, dst_tmp, dws);
- if (num_bytes % 4) {
- for (i = 0; i < (num_bytes % 4); i++)
- dst[dws+i] = dst_tmp[dws+i];
- }
+ memcpy(src_tmp, src, align_num_bytes);
+ for (i = 0; i < align_num_bytes / 4; i++)
+ dst_tmp[i] = le32_to_cpu(src_tmp[i]);
+ memcpy(dst, dst_tmp, num_bytes);
}
#else
memcpy(dst, src, num_bytes);
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 0daad44..af84705 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -252,7 +252,6 @@
}
info->par = rfbdev;
- info->skip_vt_switch = true;
ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
if (ret) {
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 13ba73f..8bd9e6c 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -3029,6 +3029,16 @@
max_sclk = 75000;
max_mclk = 80000;
}
+ } else if (rdev->family == CHIP_OLAND) {
+ if ((rdev->pdev->revision == 0xC7) ||
+ (rdev->pdev->revision == 0x80) ||
+ (rdev->pdev->revision == 0x81) ||
+ (rdev->pdev->revision == 0x83) ||
+ (rdev->pdev->revision == 0x87) ||
+ (rdev->pdev->device == 0x6604) ||
+ (rdev->pdev->device == 0x6605)) {
+ max_sclk = 75000;
+ }
}
/* Apply dpm quirks */
while (p && p->chip_device != 0) {
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index a2ec6d8..3322b15 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -392,6 +392,31 @@
rcrtc->started = true;
}
+static void rcar_du_crtc_disable_planes(struct rcar_du_crtc *rcrtc)
+{
+ struct rcar_du_device *rcdu = rcrtc->group->dev;
+ struct drm_crtc *crtc = &rcrtc->crtc;
+ u32 status;
+ /* Make sure vblank interrupts are enabled. */
+ drm_crtc_vblank_get(crtc);
+ /*
+ * Disable planes and calculate how many vertical blanking interrupts we
+ * have to wait for. If a vertical blanking interrupt has been triggered
+ * but not processed yet, we don't know whether it occurred before or
+ * after the planes got disabled. We thus have to wait for two vblank
+ * interrupts in that case.
+ */
+ spin_lock_irq(&rcrtc->vblank_lock);
+ rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0);
+ status = rcar_du_crtc_read(rcrtc, DSSR);
+ rcrtc->vblank_count = status & DSSR_VBK ? 2 : 1;
+ spin_unlock_irq(&rcrtc->vblank_lock);
+ if (!wait_event_timeout(rcrtc->vblank_wait, rcrtc->vblank_count == 0,
+ msecs_to_jiffies(100)))
+ dev_warn(rcdu->dev, "vertical blanking timeout\n");
+ drm_crtc_vblank_put(crtc);
+}
+
static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc)
{
struct drm_crtc *crtc = &rcrtc->crtc;
@@ -400,17 +425,16 @@
return;
/* Disable all planes and wait for the change to take effect. This is
- * required as the DSnPR registers are updated on vblank, and no vblank
- * will occur once the CRTC is stopped. Disabling planes when starting
- * the CRTC thus wouldn't be enough as it would start scanning out
- * immediately from old frame buffers until the next vblank.
+ * required as the plane enable registers are updated on vblank, and no
+ * vblank will occur once the CRTC is stopped. Disabling planes when
+ * starting the CRTC thus wouldn't be enough as it would start scanning
+ * out immediately from old frame buffers until the next vblank.
*
* This increases the CRTC stop delay, especially when multiple CRTCs
* are stopped in one operation as we now wait for one vblank per CRTC.
* Whether this can be improved needs to be researched.
*/
- rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0);
- drm_crtc_wait_one_vblank(crtc);
+ rcar_du_crtc_disable_planes(rcrtc);
/* Disable vertical blanking interrupt reporting. We first need to wait
* for page flip completion before stopping the CRTC as userspace
@@ -548,10 +572,25 @@
irqreturn_t ret = IRQ_NONE;
u32 status;
+ spin_lock(&rcrtc->vblank_lock);
+
status = rcar_du_crtc_read(rcrtc, DSSR);
rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK);
- if (status & DSSR_FRM) {
+ if (status & DSSR_VBK) {
+ /*
+ * Wake up the vblank wait if the counter reaches 0. This must
+ * be protected by the vblank_lock to avoid races in
+ * rcar_du_crtc_disable_planes().
+ */
+ if (rcrtc->vblank_count) {
+ if (--rcrtc->vblank_count == 0)
+ wake_up(&rcrtc->vblank_wait);
+ }
+ }
+ spin_unlock(&rcrtc->vblank_lock);
+
+ if (status & DSSR_VBK) {
drm_crtc_handle_vblank(&rcrtc->crtc);
rcar_du_crtc_finish_page_flip(rcrtc);
ret = IRQ_HANDLED;
@@ -606,6 +645,8 @@
}
init_waitqueue_head(&rcrtc->flip_wait);
+ init_waitqueue_head(&rcrtc->vblank_wait);
+ spin_lock_init(&rcrtc->vblank_lock);
rcrtc->group = rgrp;
rcrtc->mmio_offset = mmio_offsets[index];
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
index 6f08b7e..48bef05 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
@@ -15,6 +15,7 @@
#define __RCAR_DU_CRTC_H__
#include <linux/mutex.h>
+#include <linux/spinlock.h>
#include <linux/wait.h>
#include <drm/drmP.h>
@@ -33,6 +34,9 @@
* @started: whether the CRTC has been started and is running
* @event: event to post when the pending page flip completes
* @flip_wait: wait queue used to signal page flip completion
+ * @vblank_lock: protects vblank_wait and vblank_count
+ * @vblank_wait: wait queue used to signal vertical blanking
+ * @vblank_count: number of vertical blanking interrupts to wait for
* @outputs: bitmask of the outputs (enum rcar_du_output) driven by this CRTC
* @group: CRTC group this CRTC belongs to
*/
@@ -48,6 +52,10 @@
struct drm_pending_vblank_event *event;
wait_queue_head_t flip_wait;
+ spinlock_t vblank_lock;
+ wait_queue_head_t vblank_wait;
+ unsigned int vblank_count;
+
unsigned int outputs;
struct rcar_du_group *group;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index bf6e216..7d22f98 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -473,6 +473,7 @@
INIT_LIST_HEAD(&fbo->lru);
INIT_LIST_HEAD(&fbo->swap);
INIT_LIST_HEAD(&fbo->io_reserve_lru);
+ mutex_init(&fbo->wu_mutex);
fbo->moving = NULL;
drm_vma_node_reset(&fbo->vma_node);
atomic_set(&fbo->cpu_writers, 0);
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
index 094bc6a..d96c084 100644
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -225,6 +225,9 @@
/* Clear any pending interrupts we might have left. */
V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+ /* Finish any interrupt handler still in flight. */
+ disable_irq(dev->irq);
+
cancel_work_sync(&vc4->overflow_mem_work);
}
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 7cc346a..ce7c21d 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -173,6 +173,9 @@
struct vc4_dev *vc4 = v3d->vc4;
vc4_v3d_init_hw(vc4->dev);
+
+ /* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */
+ enable_irq(vc4->dev->irq);
vc4_irq_postinstall(vc4->dev);
return 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index fefb9d9..81f5a55 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2729,6 +2729,8 @@
}
view_type = vmw_view_cmd_to_type(header->id);
+ if (view_type == vmw_view_max)
+ return -EINVAL;
cmd = container_of(header, typeof(*cmd), header);
ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
user_surface_converter,
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index cd4599c..8eed456 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -175,11 +175,11 @@
Support for Cherry Cymotion keyboard.
config HID_CHICONY
- tristate "Chicony Tactical pad"
+ tristate "Chicony devices"
depends on HID
default !EXPERT
---help---
- Support for Chicony Tactical pad.
+ Support for Chicony Tactical pad and special keys on Chicony keyboards.
config HID_CORSAIR
tristate "Corsair devices"
@@ -190,6 +190,7 @@
Supported devices:
- Vengeance K90
+ - Scimitar PRO RGB
config HID_PRODIKEYS
tristate "Prodikeys PC-MIDI Keyboard support"
diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c
index bc3cec1..f04ed9a 100644
--- a/drivers/hid/hid-chicony.c
+++ b/drivers/hid/hid-chicony.c
@@ -86,6 +86,7 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
{ }
};
MODULE_DEVICE_TABLE(hid, ch_devices);
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 4f3f574..03cac57 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1872,6 +1872,7 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
@@ -1906,6 +1907,7 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
@@ -2105,6 +2107,7 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) },
{ HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) },
@@ -2362,7 +2365,6 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EARTHMATE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_DELORME, USB_DEVICE_ID_DELORME_EM_LT20) },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0400) },
- { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, 0x0401) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ESSENTIAL_REALITY, USB_DEVICE_ID_ESSENTIAL_REALITY_P5) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC5UH) },
{ HID_USB_DEVICE(USB_VENDOR_ID_ETT, USB_DEVICE_ID_TC4UM) },
@@ -2632,6 +2634,17 @@
strncmp(hdev->name, "www.masterkit.ru MA901", 22) == 0)
return true;
break;
+ case USB_VENDOR_ID_ELAN:
+ /*
+ * Many Elan devices have a product id of 0x0401 and are handled
+ * by the elan_i2c input driver. But the ACPI HID ELAN0800 dev
+ * is not (and cannot be) handled by that driver ->
+ * Ignore all 0x0401 devs except for the ELAN0800 dev.
+ */
+ if (hdev->product == 0x0401 &&
+ strncmp(hdev->name, "ELAN0800", 8) != 0)
+ return true;
+ break;
}
if (hdev->type == HID_TYPE_USBMOUSE &&
diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c
index c0303f6..9ba5d98 100644
--- a/drivers/hid/hid-corsair.c
+++ b/drivers/hid/hid-corsair.c
@@ -3,8 +3,10 @@
*
* Supported devices:
* - Vengeance K90 Keyboard
+ * - Scimitar PRO RGB Gaming Mouse
*
* Copyright (c) 2015 Clement Vuchener
+ * Copyright (c) 2017 Oscar Campos
*/
/*
@@ -670,10 +672,51 @@
return 0;
}
+/*
+ * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is
+ * non parseable as they define two consecutive Logical Minimum for
+ * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16
+ * that should be obviousy 0x26 for Logical Magimum of 16 bits. This
+ * prevents poper parsing of the report descriptor due Logical
+ * Minimum being larger than Logical Maximum.
+ *
+ * This driver fixes the report descriptor for:
+ * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse
+ */
+
+static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+ unsigned int *rsize)
+{
+ struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+
+ if (intf->cur_altsetting->desc.bInterfaceNumber == 1) {
+ /*
+ * Corsair Scimitar RGB Pro report descriptor is broken and
+ * defines two different Logical Minimum for the Consumer
+ * Application. The byte 77 should be a 0x26 defining a 16
+ * bits integer for the Logical Maximum but it is a 0x16
+ * instead (Logical Minimum)
+ */
+ switch (hdev->product) {
+ case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB:
+ if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16
+ && rdesc[78] == 0xff && rdesc[79] == 0x0f) {
+ hid_info(hdev, "Fixing up report descriptor\n");
+ rdesc[77] = 0x26;
+ }
+ break;
+ }
+
+ }
+ return rdesc;
+}
+
static const struct hid_device_id corsair_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90),
.driver_data = CORSAIR_USE_K90_MACRO |
CORSAIR_USE_K90_BACKLIGHT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR,
+ USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
{}
};
@@ -686,10 +729,14 @@
.event = corsair_event,
.remove = corsair_remove,
.input_mapping = corsair_input_mapping,
+ .report_fixup = corsair_mouse_report_fixup,
};
module_hid_driver(corsair_driver);
MODULE_LICENSE("GPL");
+/* Original K90 driver author */
MODULE_AUTHOR("Clement Vuchener");
+/* Scimitar PRO RGB driver author */
+MODULE_AUTHOR("Oscar Campos");
MODULE_DESCRIPTION("HID driver for Corsair devices");
diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c
index e06c134..7af7781 100644
--- a/drivers/hid/hid-cp2112.c
+++ b/drivers/hid/hid-cp2112.c
@@ -188,6 +188,8 @@
HID_REQ_GET_REPORT);
if (ret != CP2112_GPIO_CONFIG_LENGTH) {
hid_err(hdev, "error requesting GPIO config: %d\n", ret);
+ if (ret >= 0)
+ ret = -EIO;
goto exit;
}
@@ -197,8 +199,10 @@
ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf,
CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT,
HID_REQ_SET_REPORT);
- if (ret < 0) {
+ if (ret != CP2112_GPIO_CONFIG_LENGTH) {
hid_err(hdev, "error setting GPIO config: %d\n", ret);
+ if (ret >= 0)
+ ret = -EIO;
goto exit;
}
@@ -206,7 +210,7 @@
exit:
mutex_unlock(&dev->lock);
- return ret < 0 ? ret : -EIO;
+ return ret;
}
static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 08fd3f8..244b97c 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -277,6 +277,9 @@
#define USB_DEVICE_ID_CORSAIR_K70RGB 0x1b13
#define USB_DEVICE_ID_CORSAIR_STRAFE 0x1b15
#define USB_DEVICE_ID_CORSAIR_K65RGB 0x1b17
+#define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE 0x1b38
+#define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE 0x1b39
+#define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB 0x1b3e
#define USB_VENDOR_ID_CREATIVELABS 0x041e
#define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51 0x322c
@@ -558,6 +561,7 @@
#define USB_VENDOR_ID_JESS 0x0c45
#define USB_DEVICE_ID_JESS_YUREX 0x1010
+#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD 0x5112
#define USB_VENDOR_ID_JESS2 0x0f30
#define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111
@@ -1076,6 +1080,7 @@
#define USB_VENDOR_ID_XIN_MO 0x16c0
#define USB_DEVICE_ID_XIN_MO_DUAL_ARCADE 0x05e1
+#define USB_DEVICE_ID_THT_2P_ARCADE 0x75e1
#define USB_VENDOR_ID_XIROKU 0x1477
#define USB_DEVICE_ID_XIROKU_SPX 0x1006
diff --git a/drivers/hid/hid-xinmo.c b/drivers/hid/hid-xinmo.c
index 7df5227..9ad7731 100644
--- a/drivers/hid/hid-xinmo.c
+++ b/drivers/hid/hid-xinmo.c
@@ -46,6 +46,7 @@
static const struct hid_device_id xinmo_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
{ }
};
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 2b16207..1916f80 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -80,6 +80,9 @@
{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+ { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+ { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+ { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index d72dfb2..7a4d39c 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2192,23 +2192,23 @@
int i;
unsigned long flags;
- spin_lock_irqsave(&remote->remote_lock, flags);
- remote->remotes[index].registered = false;
- spin_unlock_irqrestore(&remote->remote_lock, flags);
-
- if (remote->remotes[index].battery.battery)
- devres_release_group(&wacom->hdev->dev,
- &remote->remotes[index].battery.bat_desc);
-
- if (remote->remotes[index].group.name)
- devres_release_group(&wacom->hdev->dev,
- &remote->remotes[index]);
-
for (i = 0; i < WACOM_MAX_REMOTES; i++) {
if (remote->remotes[i].serial == serial) {
+
+ spin_lock_irqsave(&remote->remote_lock, flags);
+ remote->remotes[i].registered = false;
+ spin_unlock_irqrestore(&remote->remote_lock, flags);
+
+ if (remote->remotes[i].battery.battery)
+ devres_release_group(&wacom->hdev->dev,
+ &remote->remotes[i].battery.bat_desc);
+
+ if (remote->remotes[i].group.name)
+ devres_release_group(&wacom->hdev->dev,
+ &remote->remotes[i]);
+
remote->remotes[i].serial = 0;
remote->remotes[i].group.name = NULL;
- remote->remotes[i].registered = false;
remote->remotes[i].battery.battery = NULL;
wacom->led.groups[i].select = WACOM_STATUS_UNKNOWN;
}
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index e0a8216..13c32eb4 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -31,6 +31,7 @@
#include <linux/clockchips.h>
#include <asm/hyperv.h>
#include <asm/mshyperv.h>
+#include <asm/nospec-branch.h>
#include "hyperv_vmbus.h"
/* The one and only */
@@ -103,9 +104,10 @@
return (u64)ULLONG_MAX;
__asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
- __asm__ __volatile__("call *%3" : "=a" (hv_status) :
+ __asm__ __volatile__(CALL_NOSPEC :
+ "=a" (hv_status) :
"c" (control), "d" (input_address),
- "m" (hypercall_page));
+ THUNK_TARGET(hypercall_page));
return hv_status;
@@ -123,11 +125,12 @@
if (!hypercall_page)
return (u64)ULLONG_MAX;
- __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
+ __asm__ __volatile__ (CALL_NOSPEC : "=d"(hv_status_hi),
"=a"(hv_status_lo) : "d" (control_hi),
"a" (control_lo), "b" (input_address_hi),
"c" (input_address_lo), "D"(output_address_hi),
- "S"(output_address_lo), "m" (hypercall_page));
+ "S"(output_address_lo),
+ THUNK_TARGET(hypercall_page));
return hv_status_lo | ((u64)hv_status_hi << 32);
#endif /* !x86_64 */
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 75126e4..4442007 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -61,7 +61,6 @@
static const char fcopy_devname[] = "vmbus/hv_fcopy";
static u8 *recv_buffer;
static struct hvutil_transport *hvt;
-static struct completion release_event;
/*
* This state maintains the version number registered by the daemon.
*/
@@ -322,7 +321,6 @@
if (cancel_delayed_work_sync(&fcopy_timeout_work))
fcopy_respond_to_host(HV_E_FAIL);
- complete(&release_event);
}
int hv_fcopy_init(struct hv_util_service *srv)
@@ -330,7 +328,6 @@
recv_buffer = srv->recv_buffer;
fcopy_transaction.recv_channel = srv->channel;
- init_completion(&release_event);
/*
* When this driver loads, the user level daemon that
* processes the host requests may not yet be running.
@@ -352,5 +349,4 @@
fcopy_transaction.state = HVUTIL_DEVICE_DYING;
cancel_delayed_work_sync(&fcopy_timeout_work);
hvutil_transport_destroy(hvt);
- wait_for_completion(&release_event);
}
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index 3abfc59..5e1fdc8 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -88,7 +88,6 @@
static const char kvp_devname[] = "vmbus/hv_kvp";
static u8 *recv_buffer;
static struct hvutil_transport *hvt;
-static struct completion release_event;
/*
* Register the kernel component with the user-level daemon.
* As part of this registration, pass the LIC version number.
@@ -717,7 +716,6 @@
if (cancel_delayed_work_sync(&kvp_timeout_work))
kvp_respond_to_host(NULL, HV_E_FAIL);
kvp_transaction.state = HVUTIL_DEVICE_INIT;
- complete(&release_event);
}
int
@@ -726,7 +724,6 @@
recv_buffer = srv->recv_buffer;
kvp_transaction.recv_channel = srv->channel;
- init_completion(&release_event);
/*
* When this driver loads, the user level daemon that
* processes the host requests may not yet be running.
@@ -750,5 +747,4 @@
cancel_delayed_work_sync(&kvp_timeout_work);
cancel_work_sync(&kvp_sendkey_work);
hvutil_transport_destroy(hvt);
- wait_for_completion(&release_event);
}
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index a76e3db..a670713 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -66,7 +66,6 @@
static const char vss_devname[] = "vmbus/hv_vss";
static __u8 *recv_buffer;
static struct hvutil_transport *hvt;
-static struct completion release_event;
static void vss_timeout_func(struct work_struct *dummy);
static void vss_handle_request(struct work_struct *dummy);
@@ -331,13 +330,11 @@
if (cancel_delayed_work_sync(&vss_timeout_work))
vss_respond_to_host(HV_E_FAIL);
vss_transaction.state = HVUTIL_DEVICE_INIT;
- complete(&release_event);
}
int
hv_vss_init(struct hv_util_service *srv)
{
- init_completion(&release_event);
if (vmbus_proto_version < VERSION_WIN8_1) {
pr_warn("Integration service 'Backup (volume snapshot)'"
" not supported on this host version.\n");
@@ -368,5 +365,4 @@
cancel_delayed_work_sync(&vss_timeout_work);
cancel_work_sync(&vss_handle_request_work);
hvutil_transport_destroy(hvt);
- wait_for_completion(&release_event);
}
diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c
index c235a95..4402a71 100644
--- a/drivers/hv/hv_utils_transport.c
+++ b/drivers/hv/hv_utils_transport.c
@@ -182,10 +182,11 @@
* connects back.
*/
hvt_reset(hvt);
- mutex_unlock(&hvt->lock);
if (mode_old == HVUTIL_TRANSPORT_DESTROY)
- hvt_transport_free(hvt);
+ complete(&hvt->release);
+
+ mutex_unlock(&hvt->lock);
return 0;
}
@@ -304,6 +305,7 @@
init_waitqueue_head(&hvt->outmsg_q);
mutex_init(&hvt->lock);
+ init_completion(&hvt->release);
spin_lock(&hvt_list_lock);
list_add(&hvt->list, &hvt_list);
@@ -351,6 +353,8 @@
if (hvt->cn_id.idx > 0 && hvt->cn_id.val > 0)
cn_del_callback(&hvt->cn_id);
- if (mode_old != HVUTIL_TRANSPORT_CHARDEV)
- hvt_transport_free(hvt);
+ if (mode_old == HVUTIL_TRANSPORT_CHARDEV)
+ wait_for_completion(&hvt->release);
+
+ hvt_transport_free(hvt);
}
diff --git a/drivers/hv/hv_utils_transport.h b/drivers/hv/hv_utils_transport.h
index d98f522..79afb62 100644
--- a/drivers/hv/hv_utils_transport.h
+++ b/drivers/hv/hv_utils_transport.h
@@ -41,6 +41,7 @@
int outmsg_len; /* its length */
wait_queue_head_t outmsg_q; /* poll/read wait queue */
struct mutex lock; /* protects struct members */
+ struct completion release; /* synchronize with fd release */
};
struct hvutil_transport *hvutil_transport_init(const char *name,
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index cccef87..975c43d 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -646,6 +646,9 @@
else
err = atk_read_value_new(sensor, value);
+ if (err)
+ return err;
+
sensor->is_valid = true;
sensor->last_updated = jiffies;
sensor->cached_value = *value;
diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c
index 1bf22ef..0f1f642 100644
--- a/drivers/hwmon/jc42.c
+++ b/drivers/hwmon/jc42.c
@@ -22,6 +22,7 @@
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -45,6 +46,7 @@
#define JC42_REG_TEMP 0x05
#define JC42_REG_MANID 0x06
#define JC42_REG_DEVICEID 0x07
+#define JC42_REG_SMBUS 0x22 /* NXP and Atmel, possibly others? */
/* Status bits in temperature register */
#define JC42_ALARM_CRIT_BIT 15
@@ -73,6 +75,9 @@
#define ONS_MANID 0x1b09 /* ON Semiconductor */
#define STM_MANID 0x104a /* ST Microelectronics */
+/* SMBUS register */
+#define SMBUS_STMOUT BIT(7) /* SMBus time-out, active low */
+
/* Supported chips */
/* Analog Devices */
@@ -476,6 +481,22 @@
data->extended = !!(cap & JC42_CAP_RANGE);
+ if (device_property_read_bool(dev, "smbus-timeout-disable")) {
+ int smbus;
+
+ /*
+ * Not all chips support this register, but from a
+ * quick read of various datasheets no chip appears
+ * incompatible with the below attempt to disable
+ * the timeout. And the whole thing is opt-in...
+ */
+ smbus = i2c_smbus_read_word_swapped(client, JC42_REG_SMBUS);
+ if (smbus < 0)
+ return smbus;
+ i2c_smbus_write_word_swapped(client, JC42_REG_SMBUS,
+ smbus | SMBUS_STMOUT);
+ }
+
config = i2c_smbus_read_word_swapped(client, JC42_REG_CONFIG);
if (config < 0)
return config;
diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c
index c1b9275..281491c 100644
--- a/drivers/hwmon/max31790.c
+++ b/drivers/hwmon/max31790.c
@@ -311,7 +311,7 @@
data->pwm[channel] = val << 8;
err = i2c_smbus_write_word_swapped(client,
MAX31790_REG_PWMOUT(channel),
- val);
+ data->pwm[channel]);
break;
case hwmon_pwm_enable:
fan_config = data->fan_config[channel];
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index ba59eae..d013acf 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -20,6 +20,7 @@
*/
#include <linux/kernel.h>
+#include <linux/math64.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/err.h>
@@ -476,8 +477,8 @@
static long pmbus_reg2data_direct(struct pmbus_data *data,
struct pmbus_sensor *sensor)
{
- long val = (s16) sensor->data;
- long m, b, R;
+ s64 b, val = (s16)sensor->data;
+ s32 m, R;
m = data->info->m[sensor->class];
b = data->info->b[sensor->class];
@@ -505,11 +506,12 @@
R--;
}
while (R < 0) {
- val = DIV_ROUND_CLOSEST(val, 10);
+ val = div_s64(val + 5LL, 10L); /* round closest */
R++;
}
- return (val - b) / m;
+ val = div_s64(val - b, m);
+ return clamp_val(val, LONG_MIN, LONG_MAX);
}
/*
@@ -629,7 +631,8 @@
static u16 pmbus_data2reg_direct(struct pmbus_data *data,
struct pmbus_sensor *sensor, long val)
{
- long m, b, R;
+ s64 b, val64 = val;
+ s32 m, R;
m = data->info->m[sensor->class];
b = data->info->b[sensor->class];
@@ -646,18 +649,18 @@
R -= 3; /* Adjust R and b for data in milli-units */
b *= 1000;
}
- val = val * m + b;
+ val64 = val64 * m + b;
while (R > 0) {
- val *= 10;
+ val64 *= 10;
R--;
}
while (R < 0) {
- val = DIV_ROUND_CLOSEST(val, 10);
+ val64 = div_s64(val64 + 5LL, 10L); /* round closest */
R++;
}
- return val;
+ return (u16)clamp_val(val64, S16_MIN, S16_MAX);
}
static u16 pmbus_data2reg_vid(struct pmbus_data *data,
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 2cd7c71..df63315 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -201,9 +201,23 @@
event_data = alloc_event_data(event_cpu);
if (!event_data)
return NULL;
-
INIT_WORK(&event_data->work, free_event_data);
+ /*
+ * In theory nothing prevent tracers in a trace session from being
+ * associated with different sinks, nor having a sink per tracer. But
+ * until we have HW with this kind of topology we need to assume tracers
+ * in a trace session are using the same sink. Therefore go through
+ * the coresight bus and pick the first enabled sink.
+ *
+ * When operated from sysFS users are responsible to enable the sink
+ * while from perf, the perf tools will do it based on the choice made
+ * on the cmd line. As such the "enable_sink" flag in sysFS is reset.
+ */
+ sink = coresight_get_enabled_sink(true);
+ if (!sink)
+ goto err;
+
mask = &event_data->mask;
/* Setup the path for each CPU in a trace session */
@@ -219,28 +233,15 @@
* list of devices from source to sink that can be
* referenced later when the path is actually needed.
*/
- event_data->path[cpu] = coresight_build_path(csdev);
+ event_data->path[cpu] = coresight_build_path(csdev, sink);
if (IS_ERR(event_data->path[cpu]))
goto err;
}
- /*
- * In theory nothing prevent tracers in a trace session from being
- * associated with different sinks, nor having a sink per tracer. But
- * until we have HW with this kind of topology and a way to convey
- * sink assignement from the perf cmd line we need to assume tracers
- * in a trace session are using the same sink. Therefore pick the sink
- * found at the end of the first available path.
- */
- cpu = cpumask_first(mask);
- /* Grab the sink at the end of the path */
- sink = coresight_get_sink(event_data->path[cpu]);
- if (!sink)
- goto err;
-
if (!sink_ops(sink)->alloc_buffer)
goto err;
+ cpu = cpumask_first(mask);
/* Get the AUX specific data from the sink buffer */
event_data->snk_config =
sink_ops(sink)->alloc_buffer(sink, cpu, pages,
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index 4db8d6a..83358eb 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -1026,7 +1026,8 @@
}
pm_runtime_put(&adev->dev);
- dev_info(dev, "%s initialized\n", (char *)id->data);
+ dev_info(dev, "CPU%d: %s initialized\n",
+ drvdata->cpu, (char *)id->data);
if (boot_enable) {
coresight_enable(drvdata->csdev);
@@ -1045,20 +1046,25 @@
}
static struct amba_id etm4_ids[] = {
- { /* ETM 4.0 - Cortex-A53 */
+ {
.id = 0x000bb95d,
.mask = 0x000fffff,
- .data = "ETM 4.0",
+ .data = "Cortex-A53 ETM v4.0",
},
- { /* ETM 4.0 - Cortex-A57 */
+ {
.id = 0x000bb95e,
.mask = 0x000fffff,
- .data = "ETM 4.0",
+ .data = "Cortex-A57 ETM v4.0",
},
- { /* ETM 4.0 - A72, Maia, HiSilicon */
+ {
.id = 0x000bb95a,
.mask = 0x000fffff,
- .data = "ETM 4.0",
+ .data = "Cortex-A72 ETM v4.0",
+ },
+ {
+ .id = 0x000bb959,
+ .mask = 0x000fffff,
+ .data = "Cortex-A73 ETM v4.0",
},
{ 0, 0},
};
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 196a14b..ef9d8e9 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -111,7 +111,9 @@
void coresight_disable_path(struct list_head *path);
int coresight_enable_path(struct list_head *path, u32 mode);
struct coresight_device *coresight_get_sink(struct list_head *path);
-struct list_head *coresight_build_path(struct coresight_device *csdev);
+struct coresight_device *coresight_get_enabled_sink(bool reset);
+struct list_head *coresight_build_path(struct coresight_device *csdev,
+ struct coresight_device *sink);
void coresight_release_path(struct list_head *path);
#ifdef CONFIG_CORESIGHT_SOURCE_ETM3X
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index d6941ea..1549436 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -70,7 +70,7 @@
* When operating in sysFS mode the content of the buffer needs to be
* read before the TMC is disabled.
*/
- if (local_read(&drvdata->mode) == CS_MODE_SYSFS)
+ if (drvdata->mode == CS_MODE_SYSFS)
tmc_etb_dump_hw(drvdata);
tmc_disable_hw(drvdata);
@@ -103,19 +103,14 @@
CS_LOCK(drvdata->base);
}
-static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev, u32 mode)
+static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev)
{
int ret = 0;
bool used = false;
char *buf = NULL;
- long val;
unsigned long flags;
struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
- /* This shouldn't be happening */
- if (WARN_ON(mode != CS_MODE_SYSFS))
- return -EINVAL;
-
/*
* If we don't have a buffer release the lock and allocate memory.
* Otherwise keep the lock and move along.
@@ -138,13 +133,12 @@
goto out;
}
- val = local_xchg(&drvdata->mode, mode);
/*
* In sysFS mode we can have multiple writers per sink. Since this
* sink is already enabled no memory is needed and the HW need not be
* touched.
*/
- if (val == CS_MODE_SYSFS)
+ if (drvdata->mode == CS_MODE_SYSFS)
goto out;
/*
@@ -163,6 +157,7 @@
drvdata->buf = buf;
}
+ drvdata->mode = CS_MODE_SYSFS;
tmc_etb_enable_hw(drvdata);
out:
spin_unlock_irqrestore(&drvdata->spinlock, flags);
@@ -177,34 +172,29 @@
return ret;
}
-static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, u32 mode)
+static int tmc_enable_etf_sink_perf(struct coresight_device *csdev)
{
int ret = 0;
- long val;
unsigned long flags;
struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
- /* This shouldn't be happening */
- if (WARN_ON(mode != CS_MODE_PERF))
- return -EINVAL;
-
spin_lock_irqsave(&drvdata->spinlock, flags);
if (drvdata->reading) {
ret = -EINVAL;
goto out;
}
- val = local_xchg(&drvdata->mode, mode);
/*
* In Perf mode there can be only one writer per sink. There
* is also no need to continue if the ETB/ETR is already operated
* from sysFS.
*/
- if (val != CS_MODE_DISABLED) {
+ if (drvdata->mode != CS_MODE_DISABLED) {
ret = -EINVAL;
goto out;
}
+ drvdata->mode = CS_MODE_PERF;
tmc_etb_enable_hw(drvdata);
out:
spin_unlock_irqrestore(&drvdata->spinlock, flags);
@@ -216,9 +206,9 @@
{
switch (mode) {
case CS_MODE_SYSFS:
- return tmc_enable_etf_sink_sysfs(csdev, mode);
+ return tmc_enable_etf_sink_sysfs(csdev);
case CS_MODE_PERF:
- return tmc_enable_etf_sink_perf(csdev, mode);
+ return tmc_enable_etf_sink_perf(csdev);
}
/* We shouldn't be here */
@@ -227,7 +217,6 @@
static void tmc_disable_etf_sink(struct coresight_device *csdev)
{
- long val;
unsigned long flags;
struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -237,10 +226,11 @@
return;
}
- val = local_xchg(&drvdata->mode, CS_MODE_DISABLED);
/* Disable the TMC only if it needs to */
- if (val != CS_MODE_DISABLED)
+ if (drvdata->mode != CS_MODE_DISABLED) {
tmc_etb_disable_hw(drvdata);
+ drvdata->mode = CS_MODE_DISABLED;
+ }
spin_unlock_irqrestore(&drvdata->spinlock, flags);
@@ -260,7 +250,7 @@
}
tmc_etf_enable_hw(drvdata);
- local_set(&drvdata->mode, CS_MODE_SYSFS);
+ drvdata->mode = CS_MODE_SYSFS;
spin_unlock_irqrestore(&drvdata->spinlock, flags);
dev_info(drvdata->dev, "TMC-ETF enabled\n");
@@ -280,7 +270,7 @@
}
tmc_etf_disable_hw(drvdata);
- local_set(&drvdata->mode, CS_MODE_DISABLED);
+ drvdata->mode = CS_MODE_DISABLED;
spin_unlock_irqrestore(&drvdata->spinlock, flags);
dev_info(drvdata->dev, "TMC disabled\n");
@@ -383,7 +373,7 @@
return;
/* This shouldn't happen */
- if (WARN_ON_ONCE(local_read(&drvdata->mode) != CS_MODE_PERF))
+ if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF))
return;
CS_UNLOCK(drvdata->base);
@@ -504,7 +494,6 @@
int tmc_read_prepare_etb(struct tmc_drvdata *drvdata)
{
- long val;
enum tmc_mode mode;
int ret = 0;
unsigned long flags;
@@ -528,9 +517,8 @@
goto out;
}
- val = local_read(&drvdata->mode);
/* Don't interfere if operated from Perf */
- if (val == CS_MODE_PERF) {
+ if (drvdata->mode == CS_MODE_PERF) {
ret = -EINVAL;
goto out;
}
@@ -542,7 +530,7 @@
}
/* Disable the TMC if need be */
- if (val == CS_MODE_SYSFS)
+ if (drvdata->mode == CS_MODE_SYSFS)
tmc_etb_disable_hw(drvdata);
drvdata->reading = true;
@@ -573,7 +561,7 @@
}
/* Re-enable the TMC if need be */
- if (local_read(&drvdata->mode) == CS_MODE_SYSFS) {
+ if (drvdata->mode == CS_MODE_SYSFS) {
/*
* The trace run will continue with the same allocated trace
* buffer. As such zero-out the buffer so that we don't end
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 886ea83..2db4857 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -15,11 +15,30 @@
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/circ_buf.h>
#include <linux/coresight.h>
#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+
#include "coresight-priv.h"
#include "coresight-tmc.h"
+/**
+ * struct cs_etr_buffer - keep track of a recording session' specifics
+ * @tmc: generic portion of the TMC buffers
+ * @paddr: the physical address of a DMA'able contiguous memory area
+ * @vaddr: the virtual address associated to @paddr
+ * @size: how much memory we have, starting at @paddr
+ * @dev: the device @vaddr has been tied to
+ */
+struct cs_etr_buffers {
+ struct cs_buffers tmc;
+ dma_addr_t paddr;
+ void __iomem *vaddr;
+ u32 size;
+ struct device *dev;
+};
+
static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
{
u32 axictl;
@@ -86,26 +105,22 @@
* When operating in sysFS mode the content of the buffer needs to be
* read before the TMC is disabled.
*/
- if (local_read(&drvdata->mode) == CS_MODE_SYSFS)
+ if (drvdata->mode == CS_MODE_SYSFS)
tmc_etr_dump_hw(drvdata);
tmc_disable_hw(drvdata);
CS_LOCK(drvdata->base);
}
-static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev, u32 mode)
+static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
{
int ret = 0;
bool used = false;
- long val;
unsigned long flags;
void __iomem *vaddr = NULL;
dma_addr_t paddr;
struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
- /* This shouldn't be happening */
- if (WARN_ON(mode != CS_MODE_SYSFS))
- return -EINVAL;
/*
* If we don't have a buffer release the lock and allocate memory.
@@ -134,13 +149,12 @@
goto out;
}
- val = local_xchg(&drvdata->mode, mode);
/*
* In sysFS mode we can have multiple writers per sink. Since this
* sink is already enabled no memory is needed and the HW need not be
* touched.
*/
- if (val == CS_MODE_SYSFS)
+ if (drvdata->mode == CS_MODE_SYSFS)
goto out;
/*
@@ -155,8 +169,7 @@
drvdata->buf = drvdata->vaddr;
}
- memset(drvdata->vaddr, 0, drvdata->size);
-
+ drvdata->mode = CS_MODE_SYSFS;
tmc_etr_enable_hw(drvdata);
out:
spin_unlock_irqrestore(&drvdata->spinlock, flags);
@@ -171,34 +184,29 @@
return ret;
}
-static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, u32 mode)
+static int tmc_enable_etr_sink_perf(struct coresight_device *csdev)
{
int ret = 0;
- long val;
unsigned long flags;
struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
- /* This shouldn't be happening */
- if (WARN_ON(mode != CS_MODE_PERF))
- return -EINVAL;
-
spin_lock_irqsave(&drvdata->spinlock, flags);
if (drvdata->reading) {
ret = -EINVAL;
goto out;
}
- val = local_xchg(&drvdata->mode, mode);
/*
* In Perf mode there can be only one writer per sink. There
* is also no need to continue if the ETR is already operated
* from sysFS.
*/
- if (val != CS_MODE_DISABLED) {
+ if (drvdata->mode != CS_MODE_DISABLED) {
ret = -EINVAL;
goto out;
}
+ drvdata->mode = CS_MODE_PERF;
tmc_etr_enable_hw(drvdata);
out:
spin_unlock_irqrestore(&drvdata->spinlock, flags);
@@ -210,9 +218,9 @@
{
switch (mode) {
case CS_MODE_SYSFS:
- return tmc_enable_etr_sink_sysfs(csdev, mode);
+ return tmc_enable_etr_sink_sysfs(csdev);
case CS_MODE_PERF:
- return tmc_enable_etr_sink_perf(csdev, mode);
+ return tmc_enable_etr_sink_perf(csdev);
}
/* We shouldn't be here */
@@ -221,7 +229,6 @@
static void tmc_disable_etr_sink(struct coresight_device *csdev)
{
- long val;
unsigned long flags;
struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -231,19 +238,244 @@
return;
}
- val = local_xchg(&drvdata->mode, CS_MODE_DISABLED);
/* Disable the TMC only if it needs to */
- if (val != CS_MODE_DISABLED)
+ if (drvdata->mode != CS_MODE_DISABLED) {
tmc_etr_disable_hw(drvdata);
+ drvdata->mode = CS_MODE_DISABLED;
+ }
spin_unlock_irqrestore(&drvdata->spinlock, flags);
dev_info(drvdata->dev, "TMC-ETR disabled\n");
}
+static void *tmc_alloc_etr_buffer(struct coresight_device *csdev, int cpu,
+ void **pages, int nr_pages, bool overwrite)
+{
+ int node;
+ struct cs_etr_buffers *buf;
+ struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+ if (cpu == -1)
+ cpu = smp_processor_id();
+ node = cpu_to_node(cpu);
+
+ /* Allocate memory structure for interaction with Perf */
+ buf = kzalloc_node(sizeof(struct cs_etr_buffers), GFP_KERNEL, node);
+ if (!buf)
+ return NULL;
+
+ buf->dev = drvdata->dev;
+ buf->size = drvdata->size;
+ buf->vaddr = dma_alloc_coherent(buf->dev, buf->size,
+ &buf->paddr, GFP_KERNEL);
+ if (!buf->vaddr) {
+ kfree(buf);
+ return NULL;
+ }
+
+ buf->tmc.snapshot = overwrite;
+ buf->tmc.nr_pages = nr_pages;
+ buf->tmc.data_pages = pages;
+
+ return buf;
+}
+
+static void tmc_free_etr_buffer(void *config)
+{
+ struct cs_etr_buffers *buf = config;
+
+ dma_free_coherent(buf->dev, buf->size, buf->vaddr, buf->paddr);
+ kfree(buf);
+}
+
+static int tmc_set_etr_buffer(struct coresight_device *csdev,
+ struct perf_output_handle *handle,
+ void *sink_config)
+{
+ int ret = 0;
+ unsigned long head;
+ struct cs_etr_buffers *buf = sink_config;
+ struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+ /* wrap head around to the amount of space we have */
+ head = handle->head & ((buf->tmc.nr_pages << PAGE_SHIFT) - 1);
+
+ /* find the page to write to */
+ buf->tmc.cur = head / PAGE_SIZE;
+
+ /* and offset within that page */
+ buf->tmc.offset = head % PAGE_SIZE;
+
+ local_set(&buf->tmc.data_size, 0);
+
+ /* Tell the HW where to put the trace data */
+ drvdata->vaddr = buf->vaddr;
+ drvdata->paddr = buf->paddr;
+ memset(drvdata->vaddr, 0, drvdata->size);
+
+ return ret;
+}
+
+static unsigned long tmc_reset_etr_buffer(struct coresight_device *csdev,
+ struct perf_output_handle *handle,
+ void *sink_config, bool *lost)
+{
+ long size = 0;
+ struct cs_etr_buffers *buf = sink_config;
+ struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+ if (buf) {
+ /*
+ * In snapshot mode ->data_size holds the new address of the
+ * ring buffer's head. The size itself is the whole address
+ * range since we want the latest information.
+ */
+ if (buf->tmc.snapshot) {
+ size = buf->tmc.nr_pages << PAGE_SHIFT;
+ handle->head = local_xchg(&buf->tmc.data_size, size);
+ }
+
+ /*
+ * Tell the tracer PMU how much we got in this run and if
+ * something went wrong along the way. Nobody else can use
+ * this cs_etr_buffers instance until we are done. As such
+ * resetting parameters here and squaring off with the ring
+ * buffer API in the tracer PMU is fine.
+ */
+ *lost = !!local_xchg(&buf->tmc.lost, 0);
+ size = local_xchg(&buf->tmc.data_size, 0);
+ }
+
+ /* Get ready for another run */
+ drvdata->vaddr = NULL;
+ drvdata->paddr = 0;
+
+ return size;
+}
+
+static void tmc_update_etr_buffer(struct coresight_device *csdev,
+ struct perf_output_handle *handle,
+ void *sink_config)
+{
+ int i, cur;
+ u32 *buf_ptr;
+ u32 read_ptr, write_ptr;
+ u32 status, to_read;
+ unsigned long offset;
+ struct cs_buffers *buf = sink_config;
+ struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+ if (!buf)
+ return;
+
+ /* This shouldn't happen */
+ if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF))
+ return;
+
+ CS_UNLOCK(drvdata->base);
+
+ tmc_flush_and_stop(drvdata);
+
+ read_ptr = readl_relaxed(drvdata->base + TMC_RRP);
+ write_ptr = readl_relaxed(drvdata->base + TMC_RWP);
+
+ /*
+ * Get a hold of the status register and see if a wrap around
+ * has occurred. If so adjust things accordingly.
+ */
+ status = readl_relaxed(drvdata->base + TMC_STS);
+ if (status & TMC_STS_FULL) {
+ local_inc(&buf->lost);
+ to_read = drvdata->size;
+ } else {
+ to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size);
+ }
+
+ /*
+ * The TMC RAM buffer may be bigger than the space available in the
+ * perf ring buffer (handle->size). If so advance the RRP so that we
+ * get the latest trace data.
+ */
+ if (to_read > handle->size) {
+ u32 buffer_start, mask = 0;
+
+ /* Read buffer start address in system memory */
+ buffer_start = readl_relaxed(drvdata->base + TMC_DBALO);
+
+ /*
+ * The value written to RRP must be byte-address aligned to
+ * the width of the trace memory databus _and_ to a frame
+ * boundary (16 byte), whichever is the biggest. For example,
+ * for 32-bit, 64-bit and 128-bit wide trace memory, the four
+ * LSBs must be 0s. For 256-bit wide trace memory, the five
+ * LSBs must be 0s.
+ */
+ switch (drvdata->memwidth) {
+ case TMC_MEM_INTF_WIDTH_32BITS:
+ case TMC_MEM_INTF_WIDTH_64BITS:
+ case TMC_MEM_INTF_WIDTH_128BITS:
+ mask = GENMASK(31, 5);
+ break;
+ case TMC_MEM_INTF_WIDTH_256BITS:
+ mask = GENMASK(31, 6);
+ break;
+ }
+
+ /*
+ * Make sure the new size is aligned in accordance with the
+ * requirement explained above.
+ */
+ to_read = handle->size & mask;
+ /* Move the RAM read pointer up */
+ read_ptr = (write_ptr + drvdata->size) - to_read;
+ /* Make sure we are still within our limits */
+ if (read_ptr > (buffer_start + (drvdata->size - 1)))
+ read_ptr -= drvdata->size;
+ /* Tell the HW */
+ writel_relaxed(read_ptr, drvdata->base + TMC_RRP);
+ local_inc(&buf->lost);
+ }
+
+ cur = buf->cur;
+ offset = buf->offset;
+
+ /* for every byte to read */
+ for (i = 0; i < to_read; i += 4) {
+ buf_ptr = buf->data_pages[cur] + offset;
+ *buf_ptr = readl_relaxed(drvdata->base + TMC_RRD);
+
+ offset += 4;
+ if (offset >= PAGE_SIZE) {
+ offset = 0;
+ cur++;
+ /* wrap around at the end of the buffer */
+ cur &= buf->nr_pages - 1;
+ }
+ }
+
+ /*
+ * In snapshot mode all we have to do is communicate to
+ * perf_aux_output_end() the address of the current head. In full
+ * trace mode the same function expects a size to move rb->aux_head
+ * forward.
+ */
+ if (buf->snapshot)
+ local_set(&buf->data_size, (cur * PAGE_SIZE) + offset);
+ else
+ local_add(to_read, &buf->data_size);
+
+ CS_LOCK(drvdata->base);
+}
+
static const struct coresight_ops_sink tmc_etr_sink_ops = {
.enable = tmc_enable_etr_sink,
.disable = tmc_disable_etr_sink,
+ .alloc_buffer = tmc_alloc_etr_buffer,
+ .free_buffer = tmc_free_etr_buffer,
+ .set_buffer = tmc_set_etr_buffer,
+ .reset_buffer = tmc_reset_etr_buffer,
+ .update_buffer = tmc_update_etr_buffer,
};
const struct coresight_ops tmc_etr_cs_ops = {
@@ -253,7 +485,6 @@
int tmc_read_prepare_etr(struct tmc_drvdata *drvdata)
{
int ret = 0;
- long val;
unsigned long flags;
/* config types are set a boot time and never change */
@@ -266,9 +497,8 @@
goto out;
}
- val = local_read(&drvdata->mode);
/* Don't interfere if operated from Perf */
- if (val == CS_MODE_PERF) {
+ if (drvdata->mode == CS_MODE_PERF) {
ret = -EINVAL;
goto out;
}
@@ -280,7 +510,7 @@
}
/* Disable the TMC if need be */
- if (val == CS_MODE_SYSFS)
+ if (drvdata->mode == CS_MODE_SYSFS)
tmc_etr_disable_hw(drvdata);
drvdata->reading = true;
@@ -303,7 +533,7 @@
spin_lock_irqsave(&drvdata->spinlock, flags);
/* RE-enable the TMC if need be */
- if (local_read(&drvdata->mode) == CS_MODE_SYSFS) {
+ if (drvdata->mode == CS_MODE_SYSFS) {
/*
* The trace run will continue with the same allocated trace
* buffer. The trace buffer is cleared in tmc_etr_enable_hw(),
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
index 44b3ae3..51c0185 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.h
+++ b/drivers/hwtracing/coresight/coresight-tmc.h
@@ -117,7 +117,7 @@
void __iomem *vaddr;
u32 size;
u32 len;
- local_t mode;
+ u32 mode;
enum tmc_config_type config_type;
enum tmc_mem_intf_width memwidth;
u32 trigger_cntr;
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index 0673baf..ff579a7 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -46,8 +46,11 @@
#define TPIU_ITATBCTR0 0xef8
/** register definition **/
+/* FFSR - 0x300 */
+#define FFSR_FT_STOPPED BIT(1)
/* FFCR - 0x304 */
#define FFCR_FON_MAN BIT(6)
+#define FFCR_STOP_FI BIT(12)
/**
* @base: memory mapped base address for this component.
@@ -85,10 +88,14 @@
{
CS_UNLOCK(drvdata->base);
- /* Clear formatter controle reg. */
- writel_relaxed(0x0, drvdata->base + TPIU_FFCR);
+ /* Clear formatter and stop on flush */
+ writel_relaxed(FFCR_STOP_FI, drvdata->base + TPIU_FFCR);
/* Generate manual flush */
- writel_relaxed(FFCR_FON_MAN, drvdata->base + TPIU_FFCR);
+ writel_relaxed(FFCR_STOP_FI | FFCR_FON_MAN, drvdata->base + TPIU_FFCR);
+ /* Wait for flush to complete */
+ coresight_timeout(drvdata->base, TPIU_FFCR, FFCR_FON_MAN, 0);
+ /* Wait for formatter to stop */
+ coresight_timeout(drvdata->base, TPIU_FFSR, FFSR_FT_STOPPED, 1);
CS_LOCK(drvdata->base);
}
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 7bf00a0..0c37356 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -368,6 +368,52 @@
return csdev;
}
+static int coresight_enabled_sink(struct device *dev, void *data)
+{
+ bool *reset = data;
+ struct coresight_device *csdev = to_coresight_device(dev);
+
+ if ((csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+ csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) &&
+ csdev->activated) {
+ /*
+ * Now that we have a handle on the sink for this session,
+ * disable the sysFS "enable_sink" flag so that possible
+ * concurrent perf session that wish to use another sink don't
+ * trip on it. Doing so has no ramification for the current
+ * session.
+ */
+ if (*reset)
+ csdev->activated = false;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * coresight_get_enabled_sink - returns the first enabled sink found on the bus
+ * @deactivate: Whether the 'enable_sink' flag should be reset
+ *
+ * When operated from perf the deactivate parameter should be set to 'true'.
+ * That way the "enabled_sink" flag of the sink that was selected can be reset,
+ * allowing for other concurrent perf sessions to choose a different sink.
+ *
+ * When operated from sysFS users have full control and as such the deactivate
+ * parameter should be set to 'false', hence mandating users to explicitly
+ * clear the flag.
+ */
+struct coresight_device *coresight_get_enabled_sink(bool deactivate)
+{
+ struct device *dev = NULL;
+
+ dev = bus_find_device(&coresight_bustype, NULL, &deactivate,
+ coresight_enabled_sink);
+
+ return dev ? to_coresight_device(dev) : NULL;
+}
+
/**
* _coresight_build_path - recursively build a path from a @csdev to a sink.
* @csdev: The device to start from.
@@ -380,6 +426,7 @@
* last one.
*/
static int _coresight_build_path(struct coresight_device *csdev,
+ struct coresight_device *sink,
struct list_head *path)
{
int i;
@@ -387,15 +434,15 @@
struct coresight_node *node;
/* An activated sink has been found. Enqueue the element */
- if ((csdev->type == CORESIGHT_DEV_TYPE_SINK ||
- csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) && csdev->activated)
+ if (csdev == sink)
goto out;
/* Not a sink - recursively explore each port found on this element */
for (i = 0; i < csdev->nr_outport; i++) {
struct coresight_device *child_dev = csdev->conns[i].child_dev;
- if (child_dev && _coresight_build_path(child_dev, path) == 0) {
+ if (child_dev &&
+ _coresight_build_path(child_dev, sink, path) == 0) {
found = true;
break;
}
@@ -422,18 +469,22 @@
return 0;
}
-struct list_head *coresight_build_path(struct coresight_device *csdev)
+struct list_head *coresight_build_path(struct coresight_device *source,
+ struct coresight_device *sink)
{
struct list_head *path;
int rc;
+ if (!sink)
+ return ERR_PTR(-EINVAL);
+
path = kzalloc(sizeof(struct list_head), GFP_KERNEL);
if (!path)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(path);
- rc = _coresight_build_path(csdev, path);
+ rc = _coresight_build_path(source, sink, path);
if (rc) {
kfree(path);
return ERR_PTR(rc);
@@ -497,6 +548,7 @@
int coresight_enable(struct coresight_device *csdev)
{
int cpu, ret = 0;
+ struct coresight_device *sink;
struct list_head *path;
mutex_lock(&coresight_mutex);
@@ -508,7 +560,17 @@
if (csdev->enable)
goto out;
- path = coresight_build_path(csdev);
+ /*
+ * Search for a valid sink for this session but don't reset the
+ * "enable_sink" flag in sysFS. Users get to do that explicitly.
+ */
+ sink = coresight_get_enabled_sink(false);
+ if (!sink) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ path = coresight_build_path(csdev, sink);
if (IS_ERR(path)) {
pr_err("building path(s) failed\n");
ret = PTR_ERR(path);
diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index 63b5db4..e0f3244 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -95,6 +95,11 @@
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9da6),
.driver_data = (kernel_ulong_t)0,
},
+ {
+ /* Gemini Lake */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e),
+ .driver_data = (kernel_ulong_t)0,
+ },
{ 0 },
};
diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index 6869712..45d6771 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -962,10 +962,6 @@
goto err_clk_dis;
}
- ret = i2c_add_adapter(&id->adap);
- if (ret < 0)
- goto err_clk_dis;
-
/*
* Cadence I2C controller has a bug wherein it generates
* invalid read transaction after HW timeout in master receiver mode.
@@ -975,6 +971,10 @@
*/
cdns_i2c_writereg(CDNS_I2C_TIMEOUT_MAX, CDNS_I2C_TIME_OUT_OFFSET);
+ ret = i2c_add_adapter(&id->adap);
+ if (ret < 0)
+ goto err_clk_dis;
+
dev_info(&pdev->dev, "%u kHz mmio %08lx irq %d\n",
id->i2c_clk / 1000, (unsigned long)r_mem->start, id->irq);
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index eb3627f..e6fe21a 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -1592,6 +1592,9 @@
/* Default timeout in interrupt mode: 200 ms */
priv->adapter.timeout = HZ / 5;
+ if (dev->irq == IRQ_NOTCONNECTED)
+ priv->features &= ~FEATURE_IRQ;
+
if (priv->features & FEATURE_IRQ) {
u16 pcictl, pcists;
diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c
index 8f11d34..c811af4 100644
--- a/drivers/i2c/busses/i2c-riic.c
+++ b/drivers/i2c/busses/i2c-riic.c
@@ -218,8 +218,12 @@
}
if (riic->is_last || riic->err) {
- riic_clear_set_bit(riic, 0, ICIER_SPIE, RIIC_ICIER);
+ riic_clear_set_bit(riic, ICIER_TEIE, ICIER_SPIE, RIIC_ICIER);
writeb(ICCR2_SP, riic->base + RIIC_ICCR2);
+ } else {
+ /* Transfer is complete, but do not send STOP */
+ riic_clear_set_bit(riic, ICIER_TEIE, 0, RIIC_ICIER);
+ complete(&riic->msg_done);
}
return IRQ_HANDLED;
diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c
index 472641f..af05e20 100644
--- a/drivers/iio/adc/ti-ads1015.c
+++ b/drivers/iio/adc/ti-ads1015.c
@@ -269,6 +269,7 @@
conv_time = DIV_ROUND_UP(USEC_PER_SEC, data->data_rate[dr_old]);
conv_time += DIV_ROUND_UP(USEC_PER_SEC, data->data_rate[dr]);
+ conv_time += conv_time / 10; /* 10% internal clock inaccuracy */
usleep_range(conv_time, conv_time + 1);
data->conv_invalid = false;
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 809a028..a09d6ee 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1482,7 +1482,7 @@
return id_priv;
}
-static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
+static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv)
{
return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
}
@@ -1877,7 +1877,8 @@
struct rdma_id_private *listen_id, *conn_id = NULL;
struct rdma_cm_event event;
struct net_device *net_dev;
- int offset, ret;
+ u8 offset;
+ int ret;
listen_id = cma_id_from_event(cm_id, ib_event, &net_dev);
if (IS_ERR(listen_id))
@@ -3309,7 +3310,8 @@
struct ib_cm_sidr_req_param req;
struct ib_cm_id *id;
void *private_data;
- int offset, ret;
+ u8 offset;
+ int ret;
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
@@ -3366,7 +3368,8 @@
struct rdma_route *route;
void *private_data;
struct ib_cm_id *id;
- int offset, ret;
+ u8 offset;
+ int ret;
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index a754fc7..ff12b8d 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -196,7 +196,7 @@
irq_poll_disable(&cq->iop);
break;
case IB_POLL_WORKQUEUE:
- flush_work(&cq->work);
+ cancel_work_sync(&cq->work);
break;
default:
WARN_ON_ONCE(1);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 19c6477..a856371 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -575,10 +575,10 @@
ret = -EAGAIN;
goto skip_cqe;
}
- if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
+ if (unlikely(!CQE_STATUS(hw_cqe) &&
+ CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
t4_set_wq_in_error(wq);
- hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN));
- goto proc_cqe;
+ hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
}
goto proc_cqe;
}
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 862381a..b55adf5 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -171,7 +171,7 @@
__be32 msn;
} rcqe;
struct {
- u32 stag;
+ __be32 stag;
u16 nada2;
u16 cidx;
} scqe;
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index 010c709..58c531d 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -675,8 +675,8 @@
__u16 wrid;
__u8 r1[3];
__u8 len16;
- __u32 r2;
- __u32 stag;
+ __be32 r2;
+ __be32 stag;
struct fw_ri_tpte tpte;
__u64 pbl[2];
};
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 24d0820..4682909 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -9769,7 +9769,7 @@
goto unimplemented;
case HFI1_IB_CFG_OP_VLS:
- val = ppd->vls_operational;
+ val = ppd->actual_vls_operational;
break;
case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */
val = VL_ARB_HIGH_PRIO_TABLE_SIZE;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 6fd043b..7db2001 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -159,6 +159,9 @@
return NOTIFY_DONE;
iwdev = &hdl->device;
+ if (iwdev->init_state < INET_NOTIFIER)
+ return NOTIFY_DONE;
+
netdev = iwdev->ldev->netdev;
upper_dev = netdev_master_upper_dev_get(netdev);
if (netdev != event_netdev)
@@ -231,6 +234,9 @@
return NOTIFY_DONE;
iwdev = &hdl->device;
+ if (iwdev->init_state < INET_NOTIFIER)
+ return NOTIFY_DONE;
+
netdev = iwdev->ldev->netdev;
if (netdev != event_netdev)
return NOTIFY_DONE;
@@ -280,6 +286,8 @@
if (!iwhdl)
return NOTIFY_DONE;
iwdev = &iwhdl->device;
+ if (iwdev->init_state < INET_NOTIFIER)
+ return NOTIFY_DONE;
p = (__be32 *)neigh->primary_key;
i40iw_copy_ip_ntohl(local_ipaddr, p);
if (neigh->nud_state & NUD_VALID) {
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index c224543..709d649 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1669,7 +1669,7 @@
context->mtu_msgmax = (IB_MTU_4096 << 5) |
ilog2(dev->dev->caps.max_gso_sz);
else
- context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
+ context->mtu_msgmax = (IB_MTU_4096 << 5) | 13;
} else if (attr_mask & IB_QP_PATH_MTU) {
if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
pr_err("path MTU (%u) is invalid\n",
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 786f640..5e29fbd 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2514,6 +2514,8 @@
qp->real_qp = qp;
qp->uobject = NULL;
qp->qp_type = MLX5_IB_QPT_REG_UMR;
+ qp->send_cq = init_attr->send_cq;
+ qp->recv_cq = init_attr->recv_cq;
attr->qp_state = IB_QPS_INIT;
attr->port_num = 1;
@@ -2573,6 +2575,18 @@
return ret;
}
+static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
+{
+ switch (umr_fence_cap) {
+ case MLX5_CAP_UMR_FENCE_NONE:
+ return MLX5_FENCE_MODE_NONE;
+ case MLX5_CAP_UMR_FENCE_SMALL:
+ return MLX5_FENCE_MODE_INITIATOR_SMALL;
+ default:
+ return MLX5_FENCE_MODE_STRONG_ORDERING;
+ }
+}
+
static int create_dev_resources(struct mlx5_ib_resources *devr)
{
struct ib_srq_init_attr attr;
@@ -3099,6 +3113,8 @@
mlx5_ib_internal_fill_odp_caps(dev);
+ dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
+
if (MLX5_CAP_GEN(mdev, imaicl)) {
dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 86e1e081..d5cc954 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -345,7 +345,7 @@
struct mlx5_ib_wq rq;
u8 sq_signal_bits;
- u8 fm_cache;
+ u8 next_fence;
struct mlx5_ib_wq sq;
/* serialize qp state modifications
@@ -643,6 +643,7 @@
struct list_head qp_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
+ u8 umr_fence;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 2665414..fdd1561 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3755,24 +3755,6 @@
}
}
-static u8 get_fence(u8 fence, struct ib_send_wr *wr)
-{
- if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
- wr->send_flags & IB_SEND_FENCE))
- return MLX5_FENCE_MODE_STRONG_ORDERING;
-
- if (unlikely(fence)) {
- if (wr->send_flags & IB_SEND_FENCE)
- return MLX5_FENCE_MODE_SMALL_AND_FENCE;
- else
- return fence;
- } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) {
- return MLX5_FENCE_MODE_FENCE;
- }
-
- return 0;
-}
-
static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl,
struct ib_send_wr *wr, unsigned *idx,
@@ -3801,8 +3783,7 @@
static void finish_wqe(struct mlx5_ib_qp *qp,
struct mlx5_wqe_ctrl_seg *ctrl,
u8 size, unsigned idx, u64 wr_id,
- int nreq, u8 fence, u8 next_fence,
- u32 mlx5_opcode)
+ int nreq, u8 fence, u32 mlx5_opcode)
{
u8 opmod = 0;
@@ -3810,7 +3791,6 @@
mlx5_opcode | ((u32)opmod << 24));
ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
ctrl->fm_ce_se |= fence;
- qp->fm_cache = next_fence;
if (unlikely(qp->wq_sig))
ctrl->signature = wq_sig(ctrl);
@@ -3870,7 +3850,6 @@
goto out;
}
- fence = qp->fm_cache;
num_sge = wr->num_sge;
if (unlikely(num_sge > qp->sq.max_gs)) {
mlx5_ib_warn(dev, "\n");
@@ -3887,6 +3866,19 @@
goto out;
}
+ if (wr->opcode == IB_WR_LOCAL_INV ||
+ wr->opcode == IB_WR_REG_MR) {
+ fence = dev->umr_fence;
+ next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+ } else if (wr->send_flags & IB_SEND_FENCE) {
+ if (qp->next_fence)
+ fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
+ else
+ fence = MLX5_FENCE_MODE_FENCE;
+ } else {
+ fence = qp->next_fence;
+ }
+
switch (ibqp->qp_type) {
case IB_QPT_XRC_INI:
xrc = seg;
@@ -3913,7 +3905,6 @@
goto out;
case IB_WR_LOCAL_INV:
- next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
set_linv_wr(qp, &seg, &size);
@@ -3921,7 +3912,6 @@
break;
case IB_WR_REG_MR:
- next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
qp->sq.wr_data[idx] = IB_WR_REG_MR;
ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
@@ -3944,9 +3934,8 @@
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id,
- nreq, get_fence(fence, wr),
- next_fence, MLX5_OPCODE_UMR);
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+ fence, MLX5_OPCODE_UMR);
/*
* SET_PSV WQEs are not signaled and solicited
* on error
@@ -3971,9 +3960,8 @@
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id,
- nreq, get_fence(fence, wr),
- next_fence, MLX5_OPCODE_SET_PSV);
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+ fence, MLX5_OPCODE_SET_PSV);
err = begin_wqe(qp, &seg, &ctrl, wr,
&idx, &size, nreq);
if (err) {
@@ -3983,7 +3971,6 @@
goto out;
}
- next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
mr->sig->psv_wire.psv_idx, &seg,
&size);
@@ -3993,9 +3980,9 @@
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id,
- nreq, get_fence(fence, wr),
- next_fence, MLX5_OPCODE_SET_PSV);
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+ fence, MLX5_OPCODE_SET_PSV);
+ qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
num_sge = 0;
goto skip_psv;
@@ -4100,8 +4087,8 @@
}
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
- get_fence(fence, wr), next_fence,
+ qp->next_fence = next_fence;
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence,
mlx5_ib_opcode[wr->opcode]);
skip_psv:
if (0)
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c
index 63890eb..eccf703 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_cm.c
@@ -404,9 +404,9 @@
}
if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h))
- packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW;
- else
packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB;
+ else
+ packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW;
packet->roce_mode = roce_mode;
memcpy(packet->header.vaddr, ud_header_buffer, header_size);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 4ba019e..35d5b89 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1653,7 +1653,7 @@
int status = 0;
if (new_state == qp->state)
- return 1;
+ return 0;
switch (qp->state) {
case QED_ROCE_QP_STATE_RESET:
diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c
index e202b81..6b712ee 100644
--- a/drivers/infiniband/sw/rdmavt/mmap.c
+++ b/drivers/infiniband/sw/rdmavt/mmap.c
@@ -170,9 +170,9 @@
spin_lock_irq(&rdi->mmap_offset_lock);
if (rdi->mmap_offset == 0)
- rdi->mmap_offset = PAGE_SIZE;
+ rdi->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
ip->offset = rdi->mmap_offset;
- rdi->mmap_offset += size;
+ rdi->mmap_offset += ALIGN(size, SHMLBA);
spin_unlock_irq(&rdi->mmap_offset_lock);
INIT_LIST_HEAD(&ip->pending_mmaps);
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index c572a4c..bd812e0 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -156,10 +156,10 @@
spin_lock_bh(&rxe->mmap_offset_lock);
if (rxe->mmap_offset == 0)
- rxe->mmap_offset = PAGE_SIZE;
+ rxe->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
ip->info.offset = rxe->mmap_offset;
- rxe->mmap_offset += size;
+ rxe->mmap_offset += ALIGN(size, SHMLBA);
spin_unlock_bh(&rxe->mmap_offset_lock);
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index ee26a1b..1c4e5b2 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -412,6 +412,8 @@
elem = kmem_cache_zalloc(pool_cache(pool),
(pool->flags & RXE_POOL_ATOMIC) ?
GFP_ATOMIC : GFP_KERNEL);
+ if (!elem)
+ return NULL;
elem->pool = pool;
kref_init(&elem->ref_cnt);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 9d08478..5b0ca35 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -726,11 +726,11 @@
ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
if (ret) {
qp->need_req_skb = 1;
- kfree_skb(skb);
rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
if (ret == -EAGAIN) {
+ kfree_skb(skb);
rxe_run_task(&qp->req.task, 1);
goto exit;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 7705820..8c0ddd7 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -799,18 +799,17 @@
/* Unreachable */
WARN_ON(1);
- /* We successfully processed this new request. */
- qp->resp.msn++;
-
/* next expected psn, read handles this separately */
qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
qp->resp.opcode = pkt->opcode;
qp->resp.status = IB_WC_SUCCESS;
- if (pkt->mask & RXE_COMP_MASK)
+ if (pkt->mask & RXE_COMP_MASK) {
+ /* We successfully processed this new request. */
+ qp->resp.msn++;
return RESPST_COMPLETE;
- else if (qp_type(qp) == IB_QPT_RC)
+ } else if (qp_type(qp) == IB_QPT_RC)
return RESPST_ACKNOWLEDGE;
else
return RESPST_CLEANUP;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 830fecb..335bd2c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1177,10 +1177,15 @@
ipoib_ib_dev_down(dev);
if (level == IPOIB_FLUSH_HEAVY) {
+ rtnl_lock();
if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
ipoib_ib_dev_stop(dev);
- if (ipoib_ib_dev_open(dev) != 0)
+
+ result = ipoib_ib_dev_open(dev);
+ rtnl_unlock();
+ if (result)
return;
+
if (netif_queue_stopped(dev))
netif_start_queue(dev);
}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 0be6a7c..cb48e22 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -430,6 +430,7 @@
struct list_head list;
struct iser_reg_resources rsc;
struct iser_pi_context *pi_ctx;
+ struct list_head all_list;
};
/**
@@ -443,6 +444,7 @@
struct list_head list;
spinlock_t lock;
int size;
+ struct list_head all_list;
};
/**
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index a4b791d..bc6f5bb 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -362,6 +362,7 @@
int i, ret;
INIT_LIST_HEAD(&fr_pool->list);
+ INIT_LIST_HEAD(&fr_pool->all_list);
spin_lock_init(&fr_pool->lock);
fr_pool->size = 0;
for (i = 0; i < cmds_max; i++) {
@@ -373,6 +374,7 @@
}
list_add_tail(&desc->list, &fr_pool->list);
+ list_add_tail(&desc->all_list, &fr_pool->all_list);
fr_pool->size++;
}
@@ -392,13 +394,13 @@
struct iser_fr_desc *desc, *tmp;
int i = 0;
- if (list_empty(&fr_pool->list))
+ if (list_empty(&fr_pool->all_list))
return;
iser_info("freeing conn %p fr pool\n", ib_conn);
- list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) {
- list_del(&desc->list);
+ list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) {
+ list_del(&desc->all_list);
iser_free_reg_res(&desc->rsc);
if (desc->pi_ctx)
iser_free_pi_ctx(desc->pi_ctx);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 39d2837..0983470 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -747,6 +747,7 @@
{
struct isert_conn *isert_conn = cma_id->qp->qp_context;
+ ib_drain_qp(isert_conn->qp);
list_del_init(&isert_conn->node);
isert_conn->cm_id = NULL;
isert_put_conn(isert_conn);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index b974897..29ab814 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -992,8 +992,7 @@
return -ENOMEM;
attr->qp_state = IB_QPS_INIT;
- attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE;
+ attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
attr->port_num = ch->sport->port;
attr->pkey_index = 0;
diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c
index caa5a62..15929d8 100644
--- a/drivers/input/misc/twl4030-vibra.c
+++ b/drivers/input/misc/twl4030-vibra.c
@@ -178,12 +178,14 @@
twl4030_vibra_suspend, twl4030_vibra_resume);
static bool twl4030_vibra_check_coexist(struct twl4030_vibra_data *pdata,
- struct device_node *node)
+ struct device_node *parent)
{
+ struct device_node *node;
+
if (pdata && pdata->coexist)
return true;
- node = of_find_node_by_name(node, "codec");
+ node = of_get_child_by_name(parent, "codec");
if (node) {
of_node_put(node);
return true;
diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c
index 5690eb7..15e0d35 100644
--- a/drivers/input/misc/twl6040-vibra.c
+++ b/drivers/input/misc/twl6040-vibra.c
@@ -248,8 +248,7 @@
int vddvibr_uV = 0;
int error;
- of_node_get(twl6040_core_dev->of_node);
- twl6040_core_node = of_find_node_by_name(twl6040_core_dev->of_node,
+ twl6040_core_node = of_get_child_by_name(twl6040_core_dev->of_node,
"vibra");
if (!twl6040_core_node) {
dev_err(&pdev->dev, "parent of node is missing?\n");
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index f26807c..af83d2e 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -1247,29 +1247,32 @@
case SS4_PACKET_ID_MULTI:
if (priv->flags & ALPS_BUTTONPAD) {
if (IS_SS4PLUS_DEV(priv->dev_id)) {
- f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0);
- f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1);
+ f->mt[2].x = SS4_PLUS_BTL_MF_X_V2(p, 0);
+ f->mt[3].x = SS4_PLUS_BTL_MF_X_V2(p, 1);
+ no_data_x = SS4_PLUS_MFPACKET_NO_AX_BL;
} else {
f->mt[2].x = SS4_BTL_MF_X_V2(p, 0);
f->mt[3].x = SS4_BTL_MF_X_V2(p, 1);
+ no_data_x = SS4_MFPACKET_NO_AX_BL;
}
+ no_data_y = SS4_MFPACKET_NO_AY_BL;
f->mt[2].y = SS4_BTL_MF_Y_V2(p, 0);
f->mt[3].y = SS4_BTL_MF_Y_V2(p, 1);
- no_data_x = SS4_MFPACKET_NO_AX_BL;
- no_data_y = SS4_MFPACKET_NO_AY_BL;
} else {
if (IS_SS4PLUS_DEV(priv->dev_id)) {
- f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0);
- f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1);
+ f->mt[2].x = SS4_PLUS_STD_MF_X_V2(p, 0);
+ f->mt[3].x = SS4_PLUS_STD_MF_X_V2(p, 1);
+ no_data_x = SS4_PLUS_MFPACKET_NO_AX;
} else {
- f->mt[0].x = SS4_STD_MF_X_V2(p, 0);
- f->mt[1].x = SS4_STD_MF_X_V2(p, 1);
+ f->mt[2].x = SS4_STD_MF_X_V2(p, 0);
+ f->mt[3].x = SS4_STD_MF_X_V2(p, 1);
+ no_data_x = SS4_MFPACKET_NO_AX;
}
+ no_data_y = SS4_MFPACKET_NO_AY;
+
f->mt[2].y = SS4_STD_MF_Y_V2(p, 0);
f->mt[3].y = SS4_STD_MF_Y_V2(p, 1);
- no_data_x = SS4_MFPACKET_NO_AX;
- no_data_y = SS4_MFPACKET_NO_AY;
}
f->first_mp = 0;
diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h
index 7931237..9bc2bab 100644
--- a/drivers/input/mouse/alps.h
+++ b/drivers/input/mouse/alps.h
@@ -120,10 +120,12 @@
#define SS4_IS_5F_DETECTED(_b) ((_b[2] & 0x10) == 0x10)
-#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */
-#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */
-#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coordinate value */
-#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coordinate value */
+#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */
+#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */
+#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coord value */
+#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coord value */
+#define SS4_PLUS_MFPACKET_NO_AX 4080 /* SS4 PLUS, X */
+#define SS4_PLUS_MFPACKET_NO_AX_BL 4088 /* Buttonless SS4 PLUS, X */
/*
* enum V7_PACKET_ID - defines the packet type for V7
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index cd834da..59603a5 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1609,7 +1609,7 @@
case 5:
etd->hw_version = 3;
break;
- case 6 ... 14:
+ case 6 ... 15:
etd->hw_version = 4;
break;
default:
diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c
index 7e2dc5e..0b49f29 100644
--- a/drivers/input/mouse/trackpoint.c
+++ b/drivers/input/mouse/trackpoint.c
@@ -383,6 +383,9 @@
if (trackpoint_read(&psmouse->ps2dev, TP_EXT_BTN, &button_info)) {
psmouse_warn(psmouse, "failed to get extended button data, assuming 3 buttons\n");
button_info = 0x33;
+ } else if (!button_info) {
+ psmouse_warn(psmouse, "got 0 in extended button data, assuming 3 buttons\n");
+ button_info = 0x33;
}
psmouse->private = kzalloc(sizeof(struct trackpoint_data), GFP_KERNEL);
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index dbf0983..d1051e3 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -520,6 +520,13 @@
DMI_MATCH(DMI_PRODUCT_NAME, "IC4I"),
},
},
+ {
+ /* TUXEDO BU1406 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"),
+ },
+ },
{ }
};
diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c
index 251ff2a..7a0dbce 100644
--- a/drivers/input/touchscreen/88pm860x-ts.c
+++ b/drivers/input/touchscreen/88pm860x-ts.c
@@ -126,7 +126,7 @@
int data, n, ret;
if (!np)
return -ENODEV;
- np = of_find_node_by_name(np, "touch");
+ np = of_get_child_by_name(np, "touch");
if (!np) {
dev_err(&pdev->dev, "Can't find touch node\n");
return -EINVAL;
@@ -144,13 +144,13 @@
if (data) {
ret = pm860x_reg_write(i2c, PM8607_GPADC_MISC1, data);
if (ret < 0)
- return -EINVAL;
+ goto err_put_node;
}
/* set tsi prebias time */
if (!of_property_read_u32(np, "marvell,88pm860x-tsi-prebias", &data)) {
ret = pm860x_reg_write(i2c, PM8607_TSI_PREBIAS, data);
if (ret < 0)
- return -EINVAL;
+ goto err_put_node;
}
/* set prebias & prechg time of pen detect */
data = 0;
@@ -161,10 +161,18 @@
if (data) {
ret = pm860x_reg_write(i2c, PM8607_PD_PREBIAS, data);
if (ret < 0)
- return -EINVAL;
+ goto err_put_node;
}
of_property_read_u32(np, "marvell,88pm860x-resistor-X", res_x);
+
+ of_node_put(np);
+
return 0;
+
+err_put_node:
+ of_node_put(np);
+
+ return -EINVAL;
}
#else
#define pm860x_touch_dt_init(x, y, z) (-1)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 1a0b110..0c910a8 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3211,7 +3211,7 @@
unsigned long start, end;
start = IOVA_PFN(region->start);
- end = IOVA_PFN(region->start + region->length);
+ end = IOVA_PFN(region->start + region->length - 1);
WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
}
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index d3d975a..7f294f7 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1547,13 +1547,15 @@
domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
domain->geometry.aperture_end = (1UL << ias) - 1;
domain->geometry.force_aperture = true;
- smmu_domain->pgtbl_ops = pgtbl_ops;
ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
- if (ret < 0)
+ if (ret < 0) {
free_io_pgtable_ops(pgtbl_ops);
+ return ret;
+ }
- return ret;
+ smmu_domain->pgtbl_ops = pgtbl_ops;
+ return 0;
}
static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
@@ -1580,7 +1582,7 @@
static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
{
- int i;
+ int i, j;
struct arm_smmu_master_data *master = fwspec->iommu_priv;
struct arm_smmu_device *smmu = master->smmu;
@@ -1588,6 +1590,13 @@
u32 sid = fwspec->ids[i];
__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
+ /* Bridged PCI devices may end up with duplicated IDs */
+ for (j = 0; j < i; j++)
+ if (fwspec->ids[j] == sid)
+ break;
+ if (j < i)
+ continue;
+
arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
}
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index c7820b3..beef59e 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -543,7 +543,10 @@
if (is_sysmmu_active(data) && data->version >= MAKE_MMU_VER(3, 3)) {
clk_enable(data->clk_master);
if (sysmmu_block(data)) {
- __sysmmu_tlb_invalidate_entry(data, iova, 1);
+ if (data->version >= MAKE_MMU_VER(5, 0))
+ __sysmmu_tlb_invalidate(data);
+ else
+ __sysmmu_tlb_invalidate_entry(data, iova, 1);
sysmmu_unblock(data);
}
clk_disable(data->clk_master);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 002f8a4..88bbc8c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2245,10 +2245,12 @@
uint64_t tmp;
if (!sg_res) {
+ unsigned int pgoff = sg->offset & ~PAGE_MASK;
+
sg_res = aligned_nrpages(sg->offset, sg->length);
- sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
+ sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
sg->dma_length = sg->length;
- pteval = page_to_phys(sg_page(sg)) | prot;
+ pteval = (sg_phys(sg) - pgoff) | prot;
phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
@@ -3894,7 +3896,7 @@
for_each_sg(sglist, sg, nelems, i) {
BUG_ON(!sg_page(sg));
- sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
+ sg->dma_address = sg_phys(sg);
sg->dma_length = sg->length;
}
return nelems;
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index f50e51c..d68a552 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -418,8 +418,12 @@
pte |= ARM_V7S_ATTR_NS_TABLE;
__arm_v7s_set_pte(ptep, pte, 1, cfg);
- } else {
+ } else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
cptep = iopte_deref(pte, lvl);
+ } else {
+ /* We require an unmap first */
+ WARN_ON(!selftest_running);
+ return -EEXIST;
}
/* Rinse, repeat */
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index b8aeb07..68c6050 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -703,7 +703,7 @@
.probe = mtk_iommu_probe,
.remove = mtk_iommu_remove,
.driver = {
- .name = "mtk-iommu",
+ .name = "mtk-iommu-v1",
.of_match_table = mtk_iommu_of_ids,
.pm = &mtk_iommu_pm_ops,
}
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index bc0af33..910cb5e 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -258,6 +258,7 @@
config MVEBU_ODMI
bool
+ select GENERIC_MSI_IRQ_DOMAIN
config MVEBU_PIC
bool
diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index 05bbf17..1070b7b 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -199,7 +199,7 @@
static int __init crossbar_of_init(struct device_node *node)
{
int i, size, reserved = 0;
- u32 max = 0, entry;
+ u32 max = 0, entry, reg_size;
const __be32 *irqsr;
int ret = -ENOMEM;
@@ -276,9 +276,9 @@
if (!cb->register_offsets)
goto err_irq_map;
- of_property_read_u32(node, "ti,reg-size", &size);
+ of_property_read_u32(node, "ti,reg-size", ®_size);
- switch (size) {
+ switch (reg_size) {
case 1:
cb->write = crossbar_writeb;
break;
@@ -304,7 +304,7 @@
continue;
cb->register_offsets[i] = reserved;
- reserved += size;
+ reserved += reg_size;
}
of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map);
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 823f698..dd7e38a 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -1032,6 +1032,7 @@
sizeof(avmb1_carddef))))
return -EFAULT;
cdef.cardtype = AVM_CARDTYPE_B1;
+ cdef.cardnr = 0;
} else {
if ((retval = copy_from_user(&cdef, data,
sizeof(avmb1_extcarddef))))
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 3fba31c..537903b 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -477,7 +477,7 @@
if (b == -1)
goto err;
- k->ptr[i] = PTR(ca->buckets[b].gen,
+ k->ptr[i] = MAKE_PTR(ca->buckets[b].gen,
bucket_to_sector(c, b),
ca->sb.nr_this_dev);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 2efdce0..cac297f 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -803,7 +803,10 @@
c->shrink.scan_objects = bch_mca_scan;
c->shrink.seeks = 4;
c->shrink.batch = c->btree_pages * 2;
- register_shrinker(&c->shrink);
+
+ if (register_shrinker(&c->shrink))
+ pr_warn("bcache: %s: could not register shrinker",
+ __func__);
return 0;
}
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 243de0bf..4bf1518 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -584,7 +584,7 @@
return false;
for (i = 0; i < KEY_PTRS(l); i++)
- if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
+ if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
return false;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 6925023..08f20b7 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -508,7 +508,7 @@
continue;
ja->cur_idx = next;
- k->ptr[n++] = PTR(0,
+ k->ptr[n++] = MAKE_PTR(0,
bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
ca->sb.nr_this_dev);
}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index e0f1c6d..edb8d1a 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -468,6 +468,7 @@
unsigned recoverable:1;
unsigned write:1;
unsigned read_dirty_data:1;
+ unsigned cache_missed:1;
unsigned long start_time;
@@ -653,6 +654,7 @@
s->orig_bio = bio;
s->cache_miss = NULL;
+ s->cache_missed = 0;
s->d = d;
s->recoverable = 1;
s->write = op_is_write(bio_op(bio));
@@ -703,7 +705,14 @@
struct search *s = container_of(cl, struct search, cl);
struct bio *bio = &s->bio.bio;
- if (s->recoverable) {
+ /*
+ * If read request hit dirty data (s->read_dirty_data is true),
+ * then recovery a failed read request from cached device may
+ * get a stale data back. So read failure recovery is only
+ * permitted when read request hit clean data in cache device,
+ * or when cache read race happened.
+ */
+ if (s->recoverable && !s->read_dirty_data) {
/* Retry from the backing device: */
trace_bcache_read_retry(s->orig_bio);
@@ -764,7 +773,7 @@
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
bch_mark_cache_accounting(s->iop.c, s->d,
- !s->cache_miss, s->iop.bypass);
+ !s->cache_missed, s->iop.bypass);
trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
if (s->iop.error)
@@ -783,6 +792,8 @@
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
struct bio *miss, *cache_bio;
+ s->cache_missed = 1;
+
if (s->cache_miss || s->iop.bypass) {
miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f4557f5..28ce342 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -2091,6 +2091,7 @@
if (bcache_major)
unregister_blkdev(bcache_major, "bcache");
unregister_reboot_notifier(&reboot);
+ mutex_destroy(&bch_register_lock);
}
static int __init bcache_init(void)
@@ -2109,14 +2110,15 @@
bcache_major = register_blkdev(0, "bcache");
if (bcache_major < 0) {
unregister_reboot_notifier(&reboot);
+ mutex_destroy(&bch_register_lock);
return bcache_major;
}
if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
- sysfs_create_files(bcache_kobj, files) ||
bch_request_init() ||
- bch_debug_init(bcache_kobj))
+ bch_debug_init(bcache_kobj) ||
+ sysfs_create_files(bcache_kobj, files))
goto err;
return 0;
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index fb02c39..f7ff408 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -2084,6 +2084,7 @@
for (k = 0; k < page; k++) {
kfree(new_bp[k].map);
}
+ kfree(new_bp);
/* restore some fields from old_counts */
bitmap->counts.bp = old_counts.bp;
@@ -2134,6 +2135,14 @@
block += old_blocks;
}
+ if (bitmap->counts.bp != old_counts.bp) {
+ unsigned long k;
+ for (k = 0; k < old_counts.pages; k++)
+ if (!old_counts.bp[k].hijacked)
+ kfree(old_counts.bp[k].map);
+ kfree(old_counts.bp);
+ }
+
if (!init) {
int i;
while (block < (chunks << chunkshift)) {
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 7643f72..3ec647e 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1554,7 +1554,8 @@
int l;
struct dm_buffer *b, *tmp;
unsigned long freed = 0;
- unsigned long count = nr_to_scan;
+ unsigned long count = c->n_buffers[LIST_CLEAN] +
+ c->n_buffers[LIST_DIRTY];
unsigned long retain_target = get_retain_buffers(c);
for (l = 0; l < LIST_SIZE; l++) {
@@ -1591,6 +1592,7 @@
{
struct dm_bufio_client *c;
unsigned long count;
+ unsigned long retain_target;
c = container_of(shrink, struct dm_bufio_client, shrinker);
if (sc->gfp_mask & __GFP_FS)
@@ -1599,8 +1601,9 @@
return 0;
count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
+ retain_target = get_retain_buffers(c);
dm_bufio_unlock(c);
- return count;
+ return (count < retain_target) ? 0 : (count - retain_target);
}
/*
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 4477bf9..e976f4f 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -81,10 +81,14 @@
#define SECTOR_TO_BLOCK_SHIFT 3
/*
+ * For btree insert:
* 3 for btree insert +
* 2 for btree lookup used within space map
+ * For btree remove:
+ * 2 for shadow spine +
+ * 4 for rebalance 3 child node
*/
-#define THIN_MAX_CONCURRENT_LOCKS 5
+#define THIN_MAX_CONCURRENT_LOCKS 6
/* This should be plenty */
#define SPACE_MAP_ROOT_SIZE 128
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 2b13117..ba7edcd 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -974,6 +974,7 @@
lockres_free(cinfo->bitmap_lockres);
unlock_all_bitmaps(mddev);
dlm_release_lockspace(cinfo->lockspace, 2);
+ kfree(cinfo);
return 0;
}
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 7a75b50..e4ececd 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -678,23 +678,8 @@
pn->keys[1] = rn->keys[0];
memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64));
- /*
- * rejig the spine. This is ugly, since it knows too
- * much about the spine
- */
- if (s->nodes[0] != new_parent) {
- unlock_block(s->info, s->nodes[0]);
- s->nodes[0] = new_parent;
- }
- if (key < le64_to_cpu(rn->keys[0])) {
- unlock_block(s->info, right);
- s->nodes[1] = left;
- } else {
- unlock_block(s->info, left);
- s->nodes[1] = right;
- }
- s->count = 2;
-
+ unlock_block(s->info, left);
+ unlock_block(s->info, right);
return 0;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7aea022..475a7a1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1689,8 +1689,11 @@
struct r5dev *dev = &sh->dev[i];
if (dev->written || i == pd_idx || i == qd_idx) {
- if (!discard && !test_bit(R5_SkipCopy, &dev->flags))
+ if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) {
set_bit(R5_UPTODATE, &dev->flags);
+ if (test_bit(STRIPE_EXPAND_READY, &sh->state))
+ set_bit(R5_Expanded, &dev->flags);
+ }
if (fua)
set_bit(R5_WantFUA, &dev->flags);
if (sync)
diff --git a/drivers/media/dvb-frontends/ascot2e.c b/drivers/media/dvb-frontends/ascot2e.c
index ad304ee..c61227c 100644
--- a/drivers/media/dvb-frontends/ascot2e.c
+++ b/drivers/media/dvb-frontends/ascot2e.c
@@ -155,7 +155,9 @@
static int ascot2e_write_reg(struct ascot2e_priv *priv, u8 reg, u8 val)
{
- return ascot2e_write_regs(priv, reg, &val, 1);
+ u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
+
+ return ascot2e_write_regs(priv, reg, &tmp, 1);
}
static int ascot2e_read_regs(struct ascot2e_priv *priv,
diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c
index fd0f25e..b97647c 100644
--- a/drivers/media/dvb-frontends/cxd2841er.c
+++ b/drivers/media/dvb-frontends/cxd2841er.c
@@ -261,7 +261,9 @@
static int cxd2841er_write_reg(struct cxd2841er_priv *priv,
u8 addr, u8 reg, u8 val)
{
- return cxd2841er_write_regs(priv, addr, reg, &val, 1);
+ u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
+
+ return cxd2841er_write_regs(priv, addr, reg, &tmp, 1);
}
static int cxd2841er_read_regs(struct cxd2841er_priv *priv,
diff --git a/drivers/media/dvb-frontends/helene.c b/drivers/media/dvb-frontends/helene.c
index dc43c5f..e06bcd4 100644
--- a/drivers/media/dvb-frontends/helene.c
+++ b/drivers/media/dvb-frontends/helene.c
@@ -331,7 +331,9 @@
static int helene_write_reg(struct helene_priv *priv, u8 reg, u8 val)
{
- return helene_write_regs(priv, reg, &val, 1);
+ u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
+
+ return helene_write_regs(priv, reg, &tmp, 1);
}
static int helene_read_regs(struct helene_priv *priv,
diff --git a/drivers/media/dvb-frontends/horus3a.c b/drivers/media/dvb-frontends/horus3a.c
index 0c089b5..4ebddc8 100644
--- a/drivers/media/dvb-frontends/horus3a.c
+++ b/drivers/media/dvb-frontends/horus3a.c
@@ -89,7 +89,9 @@
static int horus3a_write_reg(struct horus3a_priv *priv, u8 reg, u8 val)
{
- return horus3a_write_regs(priv, reg, &val, 1);
+ u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
+
+ return horus3a_write_regs(priv, reg, &tmp, 1);
}
static int horus3a_enter_power_save(struct horus3a_priv *priv)
diff --git a/drivers/media/dvb-frontends/itd1000.c b/drivers/media/dvb-frontends/itd1000.c
index cadcae4..ac9d259 100644
--- a/drivers/media/dvb-frontends/itd1000.c
+++ b/drivers/media/dvb-frontends/itd1000.c
@@ -99,8 +99,9 @@
static inline int itd1000_write_reg(struct itd1000_state *state, u8 r, u8 v)
{
- int ret = itd1000_write_regs(state, r, &v, 1);
- state->shadow[r] = v;
+ u8 tmp = v; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
+ int ret = itd1000_write_regs(state, r, &tmp, 1);
+ state->shadow[r] = tmp;
return ret;
}
diff --git a/drivers/media/dvb-frontends/mt312.c b/drivers/media/dvb-frontends/mt312.c
index fc08429..7824926 100644
--- a/drivers/media/dvb-frontends/mt312.c
+++ b/drivers/media/dvb-frontends/mt312.c
@@ -142,7 +142,10 @@
static inline int mt312_writereg(struct mt312_state *state,
const enum mt312_reg_addr reg, const u8 val)
{
- return mt312_write(state, reg, &val, 1);
+ u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
+
+
+ return mt312_write(state, reg, &tmp, 1);
}
static inline u32 mt312_div(u32 a, u32 b)
diff --git a/drivers/media/dvb-frontends/stb0899_drv.c b/drivers/media/dvb-frontends/stb0899_drv.c
index 3d171b0..3deddbc 100644
--- a/drivers/media/dvb-frontends/stb0899_drv.c
+++ b/drivers/media/dvb-frontends/stb0899_drv.c
@@ -552,7 +552,8 @@
int stb0899_write_reg(struct stb0899_state *state, unsigned int reg, u8 data)
{
- return stb0899_write_regs(state, reg, &data, 1);
+ u8 tmp = data;
+ return stb0899_write_regs(state, reg, &tmp, 1);
}
/*
diff --git a/drivers/media/dvb-frontends/stb6100.c b/drivers/media/dvb-frontends/stb6100.c
index 5add118..4746b1e 100644
--- a/drivers/media/dvb-frontends/stb6100.c
+++ b/drivers/media/dvb-frontends/stb6100.c
@@ -226,12 +226,14 @@
static int stb6100_write_reg(struct stb6100_state *state, u8 reg, u8 data)
{
+ u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
+
if (unlikely(reg >= STB6100_NUMREGS)) {
dprintk(verbose, FE_ERROR, 1, "Invalid register offset 0x%x", reg);
return -EREMOTEIO;
}
- data = (data & stb6100_template[reg].mask) | stb6100_template[reg].set;
- return stb6100_write_reg_range(state, &data, reg, 1);
+ tmp = (tmp & stb6100_template[reg].mask) | stb6100_template[reg].set;
+ return stb6100_write_reg_range(state, &tmp, reg, 1);
}
diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c
index abc379a..94cec81 100644
--- a/drivers/media/dvb-frontends/stv0367.c
+++ b/drivers/media/dvb-frontends/stv0367.c
@@ -804,7 +804,9 @@
static int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data)
{
- return stv0367_writeregs(state, reg, &data, 1);
+ u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
+
+ return stv0367_writeregs(state, reg, &tmp, 1);
}
static u8 stv0367_readreg(struct stv0367_state *state, u16 reg)
diff --git a/drivers/media/dvb-frontends/stv090x.c b/drivers/media/dvb-frontends/stv090x.c
index 25bdf6e..f0377e2 100644
--- a/drivers/media/dvb-frontends/stv090x.c
+++ b/drivers/media/dvb-frontends/stv090x.c
@@ -761,7 +761,9 @@
static int stv090x_write_reg(struct stv090x_state *state, unsigned int reg, u8 data)
{
- return stv090x_write_regs(state, reg, &data, 1);
+ u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
+
+ return stv090x_write_regs(state, reg, &tmp, 1);
}
static int stv090x_i2c_gate_ctrl(struct stv090x_state *state, int enable)
diff --git a/drivers/media/dvb-frontends/stv6110x.c b/drivers/media/dvb-frontends/stv6110x.c
index c611ad2..924f16f 100644
--- a/drivers/media/dvb-frontends/stv6110x.c
+++ b/drivers/media/dvb-frontends/stv6110x.c
@@ -97,7 +97,9 @@
static int stv6110x_write_reg(struct stv6110x_state *stv6110x, u8 reg, u8 data)
{
- return stv6110x_write_regs(stv6110x, reg, &data, 1);
+ u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
+
+ return stv6110x_write_regs(stv6110x, reg, &tmp, 1);
}
static int stv6110x_init(struct dvb_frontend *fe)
diff --git a/drivers/media/dvb-frontends/ts2020.c b/drivers/media/dvb-frontends/ts2020.c
index a9f6bbe..103b9c8 100644
--- a/drivers/media/dvb-frontends/ts2020.c
+++ b/drivers/media/dvb-frontends/ts2020.c
@@ -369,7 +369,7 @@
gain2 = clamp_t(long, gain2, 0, 13);
v_agc = clamp_t(long, v_agc, 400, 1100);
- *_gain = -(gain1 * 2330 +
+ *_gain = -((__s64)gain1 * 2330 +
gain2 * 3500 +
v_agc * 24 / 10 * 10 +
10000);
@@ -387,7 +387,7 @@
gain3 = clamp_t(long, gain3, 0, 6);
v_agc = clamp_t(long, v_agc, 600, 1600);
- *_gain = -(gain1 * 2650 +
+ *_gain = -((__s64)gain1 * 2650 +
gain2 * 3380 +
gain3 * 2850 +
v_agc * 176 / 100 * 10 -
diff --git a/drivers/media/dvb-frontends/zl10039.c b/drivers/media/dvb-frontends/zl10039.c
index f8c271b..0d2bef6 100644
--- a/drivers/media/dvb-frontends/zl10039.c
+++ b/drivers/media/dvb-frontends/zl10039.c
@@ -138,7 +138,9 @@
const enum zl10039_reg_addr reg,
const u8 val)
{
- return zl10039_write(state, reg, &val, 1);
+ const u8 tmp = val; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
+
+ return zl10039_write(state, reg, &tmp, 1);
}
static int zl10039_init(struct dvb_frontend *fe)
diff --git a/drivers/media/platform/soc_camera/soc_scale_crop.c b/drivers/media/platform/soc_camera/soc_scale_crop.c
index f77252d..d29c248 100644
--- a/drivers/media/platform/soc_camera/soc_scale_crop.c
+++ b/drivers/media/platform/soc_camera/soc_scale_crop.c
@@ -418,3 +418,7 @@
mf->height = soc_camera_shift_scale(rect->height, shift, scale_v);
}
EXPORT_SYMBOL(soc_camera_calc_client_output);
+
+MODULE_DESCRIPTION("soc-camera scaling-cropping functions");
+MODULE_AUTHOR("Guennadi Liakhovetski <kernel@pengutronix.de>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index 6ebe895..f4509ef 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -446,6 +446,8 @@
return -ERESTARTSYS;
ir = irctls[iminor(inode)];
+ mutex_unlock(&lirc_dev_lock);
+
if (!ir) {
retval = -ENODEV;
goto error;
@@ -486,8 +488,6 @@
}
error:
- mutex_unlock(&lirc_dev_lock);
-
nonseekable_open(inode, file);
return retval;
diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c
index 0e8fb89..5c4aa24 100644
--- a/drivers/media/usb/dvb-usb-v2/lmedm04.c
+++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c
@@ -504,18 +504,23 @@
static int lme2510_return_status(struct dvb_usb_device *d)
{
- int ret = 0;
+ int ret;
u8 *data;
- data = kzalloc(10, GFP_KERNEL);
+ data = kzalloc(6, GFP_KERNEL);
if (!data)
return -ENOMEM;
- ret |= usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0),
- 0x06, 0x80, 0x0302, 0x00, data, 0x0006, 200);
- info("Firmware Status: %x (%x)", ret , data[2]);
+ ret = usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0),
+ 0x06, 0x80, 0x0302, 0x00,
+ data, 0x6, 200);
+ if (ret != 6)
+ ret = -EINVAL;
+ else
+ ret = data[2];
- ret = (ret < 0) ? -ENODEV : data[2];
+ info("Firmware Status: %6ph", data);
+
kfree(data);
return ret;
}
@@ -1079,8 +1084,6 @@
if (adap->fe[0]) {
info("FE Found M88RS2000");
- dvb_attach(ts2020_attach, adap->fe[0], &ts2020_config,
- &d->i2c_adap);
st->i2c_tuner_gate_w = 5;
st->i2c_tuner_gate_r = 5;
st->i2c_tuner_addr = 0x60;
@@ -1146,17 +1149,18 @@
ret = st->tuner_config;
break;
case TUNER_RS2000:
- ret = st->tuner_config;
+ if (dvb_attach(ts2020_attach, adap->fe[0],
+ &ts2020_config, &d->i2c_adap))
+ ret = st->tuner_config;
break;
default:
break;
}
- if (ret)
+ if (ret) {
info("TUN Found %s tuner", tun_msg[ret]);
- else {
- info("TUN No tuner found --- resetting device");
- lme_coldreset(d);
+ } else {
+ info("TUN No tuner found");
return -ENODEV;
}
@@ -1200,6 +1204,7 @@
static int lme2510_identify_state(struct dvb_usb_device *d, const char **name)
{
struct lme2510_state *st = d->priv;
+ int status;
usb_reset_configuration(d->udev);
@@ -1208,12 +1213,16 @@
st->dvb_usb_lme2510_firmware = dvb_usb_lme2510_firmware;
- if (lme2510_return_status(d) == 0x44) {
+ status = lme2510_return_status(d);
+ if (status == 0x44) {
*name = lme_firmware_switch(d, 0);
return COLD;
}
- return 0;
+ if (status != 0x47)
+ return -EINVAL;
+
+ return WARM;
}
static int lme2510_get_stream_config(struct dvb_frontend *fe, u8 *ts_type,
diff --git a/drivers/media/usb/dvb-usb/cxusb.c b/drivers/media/usb/dvb-usb/cxusb.c
index 9fd43a3..b20f03d 100644
--- a/drivers/media/usb/dvb-usb/cxusb.c
+++ b/drivers/media/usb/dvb-usb/cxusb.c
@@ -820,6 +820,8 @@
case XC2028_RESET_CLK:
deb_info("%s: XC2028_RESET_CLK %d\n", __func__, arg);
break;
+ case XC2028_I2C_FLUSH:
+ break;
default:
deb_info("%s: unknown command %d, arg %d\n", __func__,
command, arg);
diff --git a/drivers/media/usb/dvb-usb/dib0700_devices.c b/drivers/media/usb/dvb-usb/dib0700_devices.c
index caa5540..2868766 100644
--- a/drivers/media/usb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/usb/dvb-usb/dib0700_devices.c
@@ -431,6 +431,7 @@
state->dib7000p_ops.set_gpio(adap->fe_adap[0].fe, 8, 0, 1);
break;
case XC2028_RESET_CLK:
+ case XC2028_I2C_FLUSH:
break;
default:
err("%s: unknown command %d, arg %d\n", __func__,
diff --git a/drivers/media/usb/dvb-usb/dibusb-common.c b/drivers/media/usb/dvb-usb/dibusb-common.c
index 8207e69..bcacb0f 100644
--- a/drivers/media/usb/dvb-usb/dibusb-common.c
+++ b/drivers/media/usb/dvb-usb/dibusb-common.c
@@ -223,8 +223,20 @@
int dibusb_read_eeprom_byte(struct dvb_usb_device *d, u8 offs, u8 *val)
{
- u8 wbuf[1] = { offs };
- return dibusb_i2c_msg(d, 0x50, wbuf, 1, val, 1);
+ u8 *buf;
+ int rc;
+
+ buf = kmalloc(2, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ buf[0] = offs;
+
+ rc = dibusb_i2c_msg(d, 0x50, &buf[0], 1, &buf[1], 1);
+ *val = buf[1];
+ kfree(buf);
+
+ return rc;
}
EXPORT_SYMBOL(dibusb_read_eeprom_byte);
diff --git a/drivers/media/usb/hdpvr/hdpvr-core.c b/drivers/media/usb/hdpvr/hdpvr-core.c
index a61d8fd..a20b60a 100644
--- a/drivers/media/usb/hdpvr/hdpvr-core.c
+++ b/drivers/media/usb/hdpvr/hdpvr-core.c
@@ -295,7 +295,7 @@
/* register v4l2_device early so it can be used for printks */
if (v4l2_device_register(&interface->dev, &dev->v4l2_dev)) {
dev_err(&interface->dev, "v4l2_device_register failed\n");
- goto error;
+ goto error_free_dev;
}
mutex_init(&dev->io_mutex);
@@ -304,7 +304,7 @@
dev->usbc_buf = kmalloc(64, GFP_KERNEL);
if (!dev->usbc_buf) {
v4l2_err(&dev->v4l2_dev, "Out of memory\n");
- goto error;
+ goto error_v4l2_unregister;
}
init_waitqueue_head(&dev->wait_buffer);
@@ -342,13 +342,13 @@
}
if (!dev->bulk_in_endpointAddr) {
v4l2_err(&dev->v4l2_dev, "Could not find bulk-in endpoint\n");
- goto error;
+ goto error_put_usb;
}
/* init the device */
if (hdpvr_device_init(dev)) {
v4l2_err(&dev->v4l2_dev, "device init failed\n");
- goto error;
+ goto error_put_usb;
}
mutex_lock(&dev->io_mutex);
@@ -356,7 +356,7 @@
mutex_unlock(&dev->io_mutex);
v4l2_err(&dev->v4l2_dev,
"allocating transfer buffers failed\n");
- goto error;
+ goto error_put_usb;
}
mutex_unlock(&dev->io_mutex);
@@ -364,7 +364,7 @@
retval = hdpvr_register_i2c_adapter(dev);
if (retval < 0) {
v4l2_err(&dev->v4l2_dev, "i2c adapter register failed\n");
- goto error;
+ goto error_free_buffers;
}
client = hdpvr_register_ir_rx_i2c(dev);
@@ -397,13 +397,17 @@
reg_fail:
#if IS_ENABLED(CONFIG_I2C)
i2c_del_adapter(&dev->i2c_adapter);
+error_free_buffers:
#endif
+ hdpvr_free_buffers(dev);
+error_put_usb:
+ usb_put_dev(dev->udev);
+ kfree(dev->usbc_buf);
+error_v4l2_unregister:
+ v4l2_device_unregister(&dev->v4l2_dev);
+error_free_dev:
+ kfree(dev);
error:
- if (dev) {
- flush_work(&dev->worker);
- /* this frees allocated memory */
- hdpvr_delete(dev);
- }
return retval;
}
diff --git a/drivers/media/usb/usbtv/usbtv-core.c b/drivers/media/usb/usbtv/usbtv-core.c
index dc76fd4..0324633 100644
--- a/drivers/media/usb/usbtv/usbtv-core.c
+++ b/drivers/media/usb/usbtv/usbtv-core.c
@@ -141,6 +141,7 @@
static struct usb_device_id usbtv_id_table[] = {
{ USB_DEVICE(0x1b71, 0x3002) },
+ { USB_DEVICE(0x1f71, 0x3301) },
{}
};
MODULE_DEVICE_TABLE(usb, usbtv_id_table);
diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
index dc51dd8..48a3922 100644
--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -18,8 +18,18 @@
#include <linux/videodev2.h>
#include <linux/v4l2-subdev.h>
#include <media/v4l2-dev.h>
+#include <media/v4l2-fh.h>
+#include <media/v4l2-ctrls.h>
#include <media/v4l2-ioctl.h>
+/* Use the same argument order as copy_in_user */
+#define assign_in_user(to, from) \
+({ \
+ typeof(*from) __assign_tmp; \
+ \
+ get_user(__assign_tmp, from) || put_user(__assign_tmp, to); \
+})
+
static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long ret = -ENOIOCTLCMD;
@@ -33,131 +43,88 @@
struct v4l2_clip32 {
struct v4l2_rect c;
- compat_caddr_t next;
+ compat_caddr_t next;
};
struct v4l2_window32 {
struct v4l2_rect w;
- __u32 field; /* enum v4l2_field */
+ __u32 field; /* enum v4l2_field */
__u32 chromakey;
compat_caddr_t clips; /* actually struct v4l2_clip32 * */
__u32 clipcount;
compat_caddr_t bitmap;
+ __u8 global_alpha;
};
-static int get_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up)
+static int get_v4l2_window32(struct v4l2_window __user *kp,
+ struct v4l2_window32 __user *up,
+ void __user *aux_buf, u32 aux_space)
{
- if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_window32)) ||
- copy_from_user(&kp->w, &up->w, sizeof(up->w)) ||
- get_user(kp->field, &up->field) ||
- get_user(kp->chromakey, &up->chromakey) ||
- get_user(kp->clipcount, &up->clipcount))
- return -EFAULT;
- if (kp->clipcount > 2048)
+ struct v4l2_clip32 __user *uclips;
+ struct v4l2_clip __user *kclips;
+ compat_caddr_t p;
+ u32 clipcount;
+
+ if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
+ copy_in_user(&kp->w, &up->w, sizeof(up->w)) ||
+ assign_in_user(&kp->field, &up->field) ||
+ assign_in_user(&kp->chromakey, &up->chromakey) ||
+ assign_in_user(&kp->global_alpha, &up->global_alpha) ||
+ get_user(clipcount, &up->clipcount) ||
+ put_user(clipcount, &kp->clipcount))
+ return -EFAULT;
+ if (clipcount > 2048)
return -EINVAL;
- if (kp->clipcount) {
- struct v4l2_clip32 __user *uclips;
- struct v4l2_clip __user *kclips;
- int n = kp->clipcount;
- compat_caddr_t p;
+ if (!clipcount)
+ return put_user(NULL, &kp->clips);
- if (get_user(p, &up->clips))
+ if (get_user(p, &up->clips))
+ return -EFAULT;
+ uclips = compat_ptr(p);
+ if (aux_space < clipcount * sizeof(*kclips))
+ return -EFAULT;
+ kclips = aux_buf;
+ if (put_user(kclips, &kp->clips))
+ return -EFAULT;
+
+ while (clipcount--) {
+ if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c)))
return -EFAULT;
- uclips = compat_ptr(p);
- kclips = compat_alloc_user_space(n * sizeof(struct v4l2_clip));
- kp->clips = kclips;
- while (--n >= 0) {
- if (copy_in_user(&kclips->c, &uclips->c, sizeof(uclips->c)))
- return -EFAULT;
- if (put_user(n ? kclips + 1 : NULL, &kclips->next))
- return -EFAULT;
- uclips += 1;
- kclips += 1;
- }
- } else
- kp->clips = NULL;
- return 0;
-}
-
-static int put_v4l2_window32(struct v4l2_window *kp, struct v4l2_window32 __user *up)
-{
- if (copy_to_user(&up->w, &kp->w, sizeof(kp->w)) ||
- put_user(kp->field, &up->field) ||
- put_user(kp->chromakey, &up->chromakey) ||
- put_user(kp->clipcount, &up->clipcount))
+ if (put_user(clipcount ? kclips + 1 : NULL, &kclips->next))
return -EFAULT;
+ uclips++;
+ kclips++;
+ }
return 0;
}
-static inline int get_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pix_format __user *up)
+static int put_v4l2_window32(struct v4l2_window __user *kp,
+ struct v4l2_window32 __user *up)
{
- if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format)))
- return -EFAULT;
- return 0;
-}
+ struct v4l2_clip __user *kclips = kp->clips;
+ struct v4l2_clip32 __user *uclips;
+ compat_caddr_t p;
+ u32 clipcount;
-static inline int get_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp,
- struct v4l2_pix_format_mplane __user *up)
-{
- if (copy_from_user(kp, up, sizeof(struct v4l2_pix_format_mplane)))
+ if (copy_in_user(&up->w, &kp->w, sizeof(kp->w)) ||
+ assign_in_user(&up->field, &kp->field) ||
+ assign_in_user(&up->chromakey, &kp->chromakey) ||
+ assign_in_user(&up->global_alpha, &kp->global_alpha) ||
+ get_user(clipcount, &kp->clipcount) ||
+ put_user(clipcount, &up->clipcount))
return -EFAULT;
- return 0;
-}
+ if (!clipcount)
+ return 0;
-static inline int put_v4l2_pix_format(struct v4l2_pix_format *kp, struct v4l2_pix_format __user *up)
-{
- if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format)))
+ if (get_user(p, &up->clips))
return -EFAULT;
- return 0;
-}
-
-static inline int put_v4l2_pix_format_mplane(struct v4l2_pix_format_mplane *kp,
- struct v4l2_pix_format_mplane __user *up)
-{
- if (copy_to_user(up, kp, sizeof(struct v4l2_pix_format_mplane)))
- return -EFAULT;
- return 0;
-}
-
-static inline int get_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vbi_format __user *up)
-{
- if (copy_from_user(kp, up, sizeof(struct v4l2_vbi_format)))
- return -EFAULT;
- return 0;
-}
-
-static inline int put_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vbi_format __user *up)
-{
- if (copy_to_user(up, kp, sizeof(struct v4l2_vbi_format)))
- return -EFAULT;
- return 0;
-}
-
-static inline int get_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up)
-{
- if (copy_from_user(kp, up, sizeof(struct v4l2_sliced_vbi_format)))
- return -EFAULT;
- return 0;
-}
-
-static inline int put_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up)
-{
- if (copy_to_user(up, kp, sizeof(struct v4l2_sliced_vbi_format)))
- return -EFAULT;
- return 0;
-}
-
-static inline int get_v4l2_sdr_format(struct v4l2_sdr_format *kp, struct v4l2_sdr_format __user *up)
-{
- if (copy_from_user(kp, up, sizeof(struct v4l2_sdr_format)))
- return -EFAULT;
- return 0;
-}
-
-static inline int put_v4l2_sdr_format(struct v4l2_sdr_format *kp, struct v4l2_sdr_format __user *up)
-{
- if (copy_to_user(up, kp, sizeof(struct v4l2_sdr_format)))
- return -EFAULT;
+ uclips = compat_ptr(p);
+ while (clipcount--) {
+ if (copy_in_user(&uclips->c, &kclips->c, sizeof(uclips->c)))
+ return -EFAULT;
+ uclips++;
+ kclips++;
+ }
return 0;
}
@@ -191,97 +158,158 @@
__u32 reserved[8];
};
-static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up)
+static int __bufsize_v4l2_format(struct v4l2_format32 __user *up, u32 *size)
{
- if (get_user(kp->type, &up->type))
+ u32 type;
+
+ if (get_user(type, &up->type))
return -EFAULT;
- switch (kp->type) {
+ switch (type) {
+ case V4L2_BUF_TYPE_VIDEO_OVERLAY:
+ case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: {
+ u32 clipcount;
+
+ if (get_user(clipcount, &up->fmt.win.clipcount))
+ return -EFAULT;
+ if (clipcount > 2048)
+ return -EINVAL;
+ *size = clipcount * sizeof(struct v4l2_clip);
+ return 0;
+ }
+ default:
+ *size = 0;
+ return 0;
+ }
+}
+
+static int bufsize_v4l2_format(struct v4l2_format32 __user *up, u32 *size)
+{
+ if (!access_ok(VERIFY_READ, up, sizeof(*up)))
+ return -EFAULT;
+ return __bufsize_v4l2_format(up, size);
+}
+
+static int __get_v4l2_format32(struct v4l2_format __user *kp,
+ struct v4l2_format32 __user *up,
+ void __user *aux_buf, u32 aux_space)
+{
+ u32 type;
+
+ if (get_user(type, &up->type) || put_user(type, &kp->type))
+ return -EFAULT;
+
+ switch (type) {
case V4L2_BUF_TYPE_VIDEO_CAPTURE:
case V4L2_BUF_TYPE_VIDEO_OUTPUT:
- return get_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix);
+ return copy_in_user(&kp->fmt.pix, &up->fmt.pix,
+ sizeof(kp->fmt.pix)) ? -EFAULT : 0;
case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
- return get_v4l2_pix_format_mplane(&kp->fmt.pix_mp,
- &up->fmt.pix_mp);
+ return copy_in_user(&kp->fmt.pix_mp, &up->fmt.pix_mp,
+ sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0;
case V4L2_BUF_TYPE_VIDEO_OVERLAY:
case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY:
- return get_v4l2_window32(&kp->fmt.win, &up->fmt.win);
+ return get_v4l2_window32(&kp->fmt.win, &up->fmt.win,
+ aux_buf, aux_space);
case V4L2_BUF_TYPE_VBI_CAPTURE:
case V4L2_BUF_TYPE_VBI_OUTPUT:
- return get_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi);
+ return copy_in_user(&kp->fmt.vbi, &up->fmt.vbi,
+ sizeof(kp->fmt.vbi)) ? -EFAULT : 0;
case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE:
case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT:
- return get_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced);
+ return copy_in_user(&kp->fmt.sliced, &up->fmt.sliced,
+ sizeof(kp->fmt.sliced)) ? -EFAULT : 0;
case V4L2_BUF_TYPE_SDR_CAPTURE:
case V4L2_BUF_TYPE_SDR_OUTPUT:
- return get_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr);
+ return copy_in_user(&kp->fmt.sdr, &up->fmt.sdr,
+ sizeof(kp->fmt.sdr)) ? -EFAULT : 0;
default:
- pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n",
- kp->type);
return -EINVAL;
}
}
-static int get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up)
+static int get_v4l2_format32(struct v4l2_format __user *kp,
+ struct v4l2_format32 __user *up,
+ void __user *aux_buf, u32 aux_space)
{
- if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_format32)))
+ if (!access_ok(VERIFY_READ, up, sizeof(*up)))
return -EFAULT;
- return __get_v4l2_format32(kp, up);
+ return __get_v4l2_format32(kp, up, aux_buf, aux_space);
}
-static int get_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up)
+static int bufsize_v4l2_create(struct v4l2_create_buffers32 __user *up,
+ u32 *size)
{
- if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_create_buffers32)) ||
- copy_from_user(kp, up, offsetof(struct v4l2_create_buffers32, format)))
+ if (!access_ok(VERIFY_READ, up, sizeof(*up)))
return -EFAULT;
- return __get_v4l2_format32(&kp->format, &up->format);
+ return __bufsize_v4l2_format(&up->format, size);
}
-static int __put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up)
+static int get_v4l2_create32(struct v4l2_create_buffers __user *kp,
+ struct v4l2_create_buffers32 __user *up,
+ void __user *aux_buf, u32 aux_space)
{
- if (put_user(kp->type, &up->type))
+ if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
+ copy_in_user(kp, up,
+ offsetof(struct v4l2_create_buffers32, format)))
+ return -EFAULT;
+ return __get_v4l2_format32(&kp->format, &up->format,
+ aux_buf, aux_space);
+}
+
+static int __put_v4l2_format32(struct v4l2_format __user *kp,
+ struct v4l2_format32 __user *up)
+{
+ u32 type;
+
+ if (get_user(type, &kp->type))
return -EFAULT;
- switch (kp->type) {
+ switch (type) {
case V4L2_BUF_TYPE_VIDEO_CAPTURE:
case V4L2_BUF_TYPE_VIDEO_OUTPUT:
- return put_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix);
+ return copy_in_user(&up->fmt.pix, &kp->fmt.pix,
+ sizeof(kp->fmt.pix)) ? -EFAULT : 0;
case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
- return put_v4l2_pix_format_mplane(&kp->fmt.pix_mp,
- &up->fmt.pix_mp);
+ return copy_in_user(&up->fmt.pix_mp, &kp->fmt.pix_mp,
+ sizeof(kp->fmt.pix_mp)) ? -EFAULT : 0;
case V4L2_BUF_TYPE_VIDEO_OVERLAY:
case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY:
return put_v4l2_window32(&kp->fmt.win, &up->fmt.win);
case V4L2_BUF_TYPE_VBI_CAPTURE:
case V4L2_BUF_TYPE_VBI_OUTPUT:
- return put_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi);
+ return copy_in_user(&up->fmt.vbi, &kp->fmt.vbi,
+ sizeof(kp->fmt.vbi)) ? -EFAULT : 0;
case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE:
case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT:
- return put_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced);
+ return copy_in_user(&up->fmt.sliced, &kp->fmt.sliced,
+ sizeof(kp->fmt.sliced)) ? -EFAULT : 0;
case V4L2_BUF_TYPE_SDR_CAPTURE:
case V4L2_BUF_TYPE_SDR_OUTPUT:
- return put_v4l2_sdr_format(&kp->fmt.sdr, &up->fmt.sdr);
+ return copy_in_user(&up->fmt.sdr, &kp->fmt.sdr,
+ sizeof(kp->fmt.sdr)) ? -EFAULT : 0;
default:
- pr_info("compat_ioctl32: unexpected VIDIOC_FMT type %d\n",
- kp->type);
return -EINVAL;
}
}
-static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up)
+static int put_v4l2_format32(struct v4l2_format __user *kp,
+ struct v4l2_format32 __user *up)
{
- if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_format32)))
+ if (!access_ok(VERIFY_WRITE, up, sizeof(*up)))
return -EFAULT;
return __put_v4l2_format32(kp, up);
}
-static int put_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up)
+static int put_v4l2_create32(struct v4l2_create_buffers __user *kp,
+ struct v4l2_create_buffers32 __user *up)
{
- if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_create_buffers32)) ||
- copy_to_user(up, kp, offsetof(struct v4l2_create_buffers32, format)) ||
- copy_to_user(up->reserved, kp->reserved, sizeof(kp->reserved)))
+ if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
+ copy_in_user(up, kp,
+ offsetof(struct v4l2_create_buffers32, format)) ||
+ copy_in_user(up->reserved, kp->reserved, sizeof(kp->reserved)))
return -EFAULT;
return __put_v4l2_format32(&kp->format, &up->format);
}
@@ -295,25 +323,28 @@
__u32 reserved[4];
};
-static int get_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up)
+static int get_v4l2_standard32(struct v4l2_standard __user *kp,
+ struct v4l2_standard32 __user *up)
{
/* other fields are not set by the user, nor used by the driver */
- if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_standard32)) ||
- get_user(kp->index, &up->index))
+ if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
+ assign_in_user(&kp->index, &up->index))
return -EFAULT;
return 0;
}
-static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32 __user *up)
+static int put_v4l2_standard32(struct v4l2_standard __user *kp,
+ struct v4l2_standard32 __user *up)
{
- if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_standard32)) ||
- put_user(kp->index, &up->index) ||
- put_user(kp->id, &up->id) ||
- copy_to_user(up->name, kp->name, 24) ||
- copy_to_user(&up->frameperiod, &kp->frameperiod, sizeof(kp->frameperiod)) ||
- put_user(kp->framelines, &up->framelines) ||
- copy_to_user(up->reserved, kp->reserved, 4 * sizeof(__u32)))
- return -EFAULT;
+ if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
+ assign_in_user(&up->index, &kp->index) ||
+ assign_in_user(&up->id, &kp->id) ||
+ copy_in_user(up->name, kp->name, sizeof(up->name)) ||
+ copy_in_user(&up->frameperiod, &kp->frameperiod,
+ sizeof(up->frameperiod)) ||
+ assign_in_user(&up->framelines, &kp->framelines) ||
+ copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved)))
+ return -EFAULT;
return 0;
}
@@ -352,134 +383,186 @@
__u32 reserved;
};
-static int get_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32,
- enum v4l2_memory memory)
+static int get_v4l2_plane32(struct v4l2_plane __user *up,
+ struct v4l2_plane32 __user *up32,
+ enum v4l2_memory memory)
{
- void __user *up_pln;
- compat_long_t p;
+ compat_ulong_t p;
if (copy_in_user(up, up32, 2 * sizeof(__u32)) ||
- copy_in_user(&up->data_offset, &up32->data_offset,
- sizeof(__u32)))
+ copy_in_user(&up->data_offset, &up32->data_offset,
+ sizeof(up->data_offset)))
return -EFAULT;
- if (memory == V4L2_MEMORY_USERPTR) {
- if (get_user(p, &up32->m.userptr))
- return -EFAULT;
- up_pln = compat_ptr(p);
- if (put_user((unsigned long)up_pln, &up->m.userptr))
- return -EFAULT;
- } else if (memory == V4L2_MEMORY_DMABUF) {
- if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(int)))
- return -EFAULT;
- } else {
+ switch (memory) {
+ case V4L2_MEMORY_MMAP:
+ case V4L2_MEMORY_OVERLAY:
if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset,
- sizeof(__u32)))
+ sizeof(up32->m.mem_offset)))
return -EFAULT;
+ break;
+ case V4L2_MEMORY_USERPTR:
+ if (get_user(p, &up32->m.userptr) ||
+ put_user((unsigned long)compat_ptr(p), &up->m.userptr))
+ return -EFAULT;
+ break;
+ case V4L2_MEMORY_DMABUF:
+ if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(up32->m.fd)))
+ return -EFAULT;
+ break;
}
return 0;
}
-static int put_v4l2_plane32(struct v4l2_plane __user *up, struct v4l2_plane32 __user *up32,
- enum v4l2_memory memory)
+static int put_v4l2_plane32(struct v4l2_plane __user *up,
+ struct v4l2_plane32 __user *up32,
+ enum v4l2_memory memory)
{
+ unsigned long p;
+
if (copy_in_user(up32, up, 2 * sizeof(__u32)) ||
- copy_in_user(&up32->data_offset, &up->data_offset,
- sizeof(__u32)))
+ copy_in_user(&up32->data_offset, &up->data_offset,
+ sizeof(up->data_offset)))
return -EFAULT;
- /* For MMAP, driver might've set up the offset, so copy it back.
- * USERPTR stays the same (was userspace-provided), so no copying. */
- if (memory == V4L2_MEMORY_MMAP)
+ switch (memory) {
+ case V4L2_MEMORY_MMAP:
+ case V4L2_MEMORY_OVERLAY:
if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset,
- sizeof(__u32)))
+ sizeof(up->m.mem_offset)))
return -EFAULT;
- /* For DMABUF, driver might've set up the fd, so copy it back. */
- if (memory == V4L2_MEMORY_DMABUF)
- if (copy_in_user(&up32->m.fd, &up->m.fd,
- sizeof(int)))
+ break;
+ case V4L2_MEMORY_USERPTR:
+ if (get_user(p, &up->m.userptr) ||
+ put_user((compat_ulong_t)ptr_to_compat((__force void *)p),
+ &up32->m.userptr))
return -EFAULT;
+ break;
+ case V4L2_MEMORY_DMABUF:
+ if (copy_in_user(&up32->m.fd, &up->m.fd, sizeof(up->m.fd)))
+ return -EFAULT;
+ break;
+ }
return 0;
}
-static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user *up)
+static int bufsize_v4l2_buffer(struct v4l2_buffer32 __user *up, u32 *size)
{
+ u32 type;
+ u32 length;
+
+ if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
+ get_user(type, &up->type) ||
+ get_user(length, &up->length))
+ return -EFAULT;
+
+ if (V4L2_TYPE_IS_MULTIPLANAR(type)) {
+ if (length > VIDEO_MAX_PLANES)
+ return -EINVAL;
+
+ /*
+ * We don't really care if userspace decides to kill itself
+ * by passing a very big length value
+ */
+ *size = length * sizeof(struct v4l2_plane);
+ } else {
+ *size = 0;
+ }
+ return 0;
+}
+
+static int get_v4l2_buffer32(struct v4l2_buffer __user *kp,
+ struct v4l2_buffer32 __user *up,
+ void __user *aux_buf, u32 aux_space)
+{
+ u32 type;
+ u32 length;
+ enum v4l2_memory memory;
struct v4l2_plane32 __user *uplane32;
struct v4l2_plane __user *uplane;
compat_caddr_t p;
- int num_planes;
int ret;
- if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_buffer32)) ||
- get_user(kp->index, &up->index) ||
- get_user(kp->type, &up->type) ||
- get_user(kp->flags, &up->flags) ||
- get_user(kp->memory, &up->memory) ||
- get_user(kp->length, &up->length))
+ if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
+ assign_in_user(&kp->index, &up->index) ||
+ get_user(type, &up->type) ||
+ put_user(type, &kp->type) ||
+ assign_in_user(&kp->flags, &up->flags) ||
+ get_user(memory, &up->memory) ||
+ put_user(memory, &kp->memory) ||
+ get_user(length, &up->length) ||
+ put_user(length, &kp->length))
+ return -EFAULT;
+
+ if (V4L2_TYPE_IS_OUTPUT(type))
+ if (assign_in_user(&kp->bytesused, &up->bytesused) ||
+ assign_in_user(&kp->field, &up->field) ||
+ assign_in_user(&kp->timestamp.tv_sec,
+ &up->timestamp.tv_sec) ||
+ assign_in_user(&kp->timestamp.tv_usec,
+ &up->timestamp.tv_usec))
return -EFAULT;
- if (V4L2_TYPE_IS_OUTPUT(kp->type))
- if (get_user(kp->bytesused, &up->bytesused) ||
- get_user(kp->field, &up->field) ||
- get_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) ||
- get_user(kp->timestamp.tv_usec,
- &up->timestamp.tv_usec))
- return -EFAULT;
+ if (V4L2_TYPE_IS_MULTIPLANAR(type)) {
+ u32 num_planes = length;
- if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) {
- num_planes = kp->length;
if (num_planes == 0) {
- kp->m.planes = NULL;
- /* num_planes == 0 is legal, e.g. when userspace doesn't
- * need planes array on DQBUF*/
- return 0;
+ /*
+ * num_planes == 0 is legal, e.g. when userspace doesn't
+ * need planes array on DQBUF
+ */
+ return put_user(NULL, &kp->m.planes);
}
+ if (num_planes > VIDEO_MAX_PLANES)
+ return -EINVAL;
if (get_user(p, &up->m.planes))
return -EFAULT;
uplane32 = compat_ptr(p);
if (!access_ok(VERIFY_READ, uplane32,
- num_planes * sizeof(struct v4l2_plane32)))
+ num_planes * sizeof(*uplane32)))
return -EFAULT;
- /* We don't really care if userspace decides to kill itself
- * by passing a very big num_planes value */
- uplane = compat_alloc_user_space(num_planes *
- sizeof(struct v4l2_plane));
- kp->m.planes = (__force struct v4l2_plane *)uplane;
+ /*
+ * We don't really care if userspace decides to kill itself
+ * by passing a very big num_planes value
+ */
+ if (aux_space < num_planes * sizeof(*uplane))
+ return -EFAULT;
- while (--num_planes >= 0) {
- ret = get_v4l2_plane32(uplane, uplane32, kp->memory);
+ uplane = aux_buf;
+ if (put_user((__force struct v4l2_plane *)uplane,
+ &kp->m.planes))
+ return -EFAULT;
+
+ while (num_planes--) {
+ ret = get_v4l2_plane32(uplane, uplane32, memory);
if (ret)
return ret;
- ++uplane;
- ++uplane32;
+ uplane++;
+ uplane32++;
}
} else {
- switch (kp->memory) {
+ switch (memory) {
case V4L2_MEMORY_MMAP:
- if (get_user(kp->m.offset, &up->m.offset))
- return -EFAULT;
- break;
- case V4L2_MEMORY_USERPTR:
- {
- compat_long_t tmp;
-
- if (get_user(tmp, &up->m.userptr))
- return -EFAULT;
-
- kp->m.userptr = (unsigned long)compat_ptr(tmp);
- }
- break;
case V4L2_MEMORY_OVERLAY:
- if (get_user(kp->m.offset, &up->m.offset))
+ if (assign_in_user(&kp->m.offset, &up->m.offset))
return -EFAULT;
break;
+ case V4L2_MEMORY_USERPTR: {
+ compat_ulong_t userptr;
+
+ if (get_user(userptr, &up->m.userptr) ||
+ put_user((unsigned long)compat_ptr(userptr),
+ &kp->m.userptr))
+ return -EFAULT;
+ break;
+ }
case V4L2_MEMORY_DMABUF:
- if (get_user(kp->m.fd, &up->m.fd))
+ if (assign_in_user(&kp->m.fd, &up->m.fd))
return -EFAULT;
break;
}
@@ -488,65 +571,70 @@
return 0;
}
-static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user *up)
+static int put_v4l2_buffer32(struct v4l2_buffer __user *kp,
+ struct v4l2_buffer32 __user *up)
{
+ u32 type;
+ u32 length;
+ enum v4l2_memory memory;
struct v4l2_plane32 __user *uplane32;
struct v4l2_plane __user *uplane;
compat_caddr_t p;
- int num_planes;
int ret;
- if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_buffer32)) ||
- put_user(kp->index, &up->index) ||
- put_user(kp->type, &up->type) ||
- put_user(kp->flags, &up->flags) ||
- put_user(kp->memory, &up->memory))
- return -EFAULT;
+ if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
+ assign_in_user(&up->index, &kp->index) ||
+ get_user(type, &kp->type) ||
+ put_user(type, &up->type) ||
+ assign_in_user(&up->flags, &kp->flags) ||
+ get_user(memory, &kp->memory) ||
+ put_user(memory, &up->memory))
+ return -EFAULT;
- if (put_user(kp->bytesused, &up->bytesused) ||
- put_user(kp->field, &up->field) ||
- put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) ||
- put_user(kp->timestamp.tv_usec, &up->timestamp.tv_usec) ||
- copy_to_user(&up->timecode, &kp->timecode, sizeof(struct v4l2_timecode)) ||
- put_user(kp->sequence, &up->sequence) ||
- put_user(kp->reserved2, &up->reserved2) ||
- put_user(kp->reserved, &up->reserved) ||
- put_user(kp->length, &up->length))
- return -EFAULT;
+ if (assign_in_user(&up->bytesused, &kp->bytesused) ||
+ assign_in_user(&up->field, &kp->field) ||
+ assign_in_user(&up->timestamp.tv_sec, &kp->timestamp.tv_sec) ||
+ assign_in_user(&up->timestamp.tv_usec, &kp->timestamp.tv_usec) ||
+ copy_in_user(&up->timecode, &kp->timecode, sizeof(kp->timecode)) ||
+ assign_in_user(&up->sequence, &kp->sequence) ||
+ assign_in_user(&up->reserved2, &kp->reserved2) ||
+ assign_in_user(&up->reserved, &kp->reserved) ||
+ get_user(length, &kp->length) ||
+ put_user(length, &up->length))
+ return -EFAULT;
- if (V4L2_TYPE_IS_MULTIPLANAR(kp->type)) {
- num_planes = kp->length;
+ if (V4L2_TYPE_IS_MULTIPLANAR(type)) {
+ u32 num_planes = length;
+
if (num_planes == 0)
return 0;
- uplane = (__force struct v4l2_plane __user *)kp->m.planes;
+ if (get_user(uplane, ((__force struct v4l2_plane __user **)&kp->m.planes)))
+ return -EFAULT;
if (get_user(p, &up->m.planes))
return -EFAULT;
uplane32 = compat_ptr(p);
- while (--num_planes >= 0) {
- ret = put_v4l2_plane32(uplane, uplane32, kp->memory);
+ while (num_planes--) {
+ ret = put_v4l2_plane32(uplane, uplane32, memory);
if (ret)
return ret;
++uplane;
++uplane32;
}
} else {
- switch (kp->memory) {
+ switch (memory) {
case V4L2_MEMORY_MMAP:
- if (put_user(kp->m.offset, &up->m.offset))
+ case V4L2_MEMORY_OVERLAY:
+ if (assign_in_user(&up->m.offset, &kp->m.offset))
return -EFAULT;
break;
case V4L2_MEMORY_USERPTR:
- if (put_user(kp->m.userptr, &up->m.userptr))
- return -EFAULT;
- break;
- case V4L2_MEMORY_OVERLAY:
- if (put_user(kp->m.offset, &up->m.offset))
+ if (assign_in_user(&up->m.userptr, &kp->m.userptr))
return -EFAULT;
break;
case V4L2_MEMORY_DMABUF:
- if (put_user(kp->m.fd, &up->m.fd))
+ if (assign_in_user(&up->m.fd, &kp->m.fd))
return -EFAULT;
break;
}
@@ -558,7 +646,7 @@
struct v4l2_framebuffer32 {
__u32 capability;
__u32 flags;
- compat_caddr_t base;
+ compat_caddr_t base;
struct {
__u32 width;
__u32 height;
@@ -571,30 +659,33 @@
} fmt;
};
-static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_framebuffer32 __user *up)
+static int get_v4l2_framebuffer32(struct v4l2_framebuffer __user *kp,
+ struct v4l2_framebuffer32 __user *up)
{
- u32 tmp;
+ compat_caddr_t tmp;
- if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_framebuffer32)) ||
- get_user(tmp, &up->base) ||
- get_user(kp->capability, &up->capability) ||
- get_user(kp->flags, &up->flags) ||
- copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt)))
- return -EFAULT;
- kp->base = (__force void *)compat_ptr(tmp);
+ if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
+ get_user(tmp, &up->base) ||
+ put_user((__force void *)compat_ptr(tmp), &kp->base) ||
+ assign_in_user(&kp->capability, &up->capability) ||
+ assign_in_user(&kp->flags, &up->flags) ||
+ copy_in_user(&kp->fmt, &up->fmt, sizeof(kp->fmt)))
+ return -EFAULT;
return 0;
}
-static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_framebuffer32 __user *up)
+static int put_v4l2_framebuffer32(struct v4l2_framebuffer __user *kp,
+ struct v4l2_framebuffer32 __user *up)
{
- u32 tmp = (u32)((unsigned long)kp->base);
+ void *base;
- if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_framebuffer32)) ||
- put_user(tmp, &up->base) ||
- put_user(kp->capability, &up->capability) ||
- put_user(kp->flags, &up->flags) ||
- copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt)))
- return -EFAULT;
+ if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
+ get_user(base, &kp->base) ||
+ put_user(ptr_to_compat(base), &up->base) ||
+ assign_in_user(&up->capability, &kp->capability) ||
+ assign_in_user(&up->flags, &kp->flags) ||
+ copy_in_user(&up->fmt, &kp->fmt, sizeof(kp->fmt)))
+ return -EFAULT;
return 0;
}
@@ -606,21 +697,26 @@
__u32 tuner; /* Associated tuner */
compat_u64 std;
__u32 status;
- __u32 reserved[4];
+ __u32 capabilities;
+ __u32 reserved[3];
};
-/* The 64-bit v4l2_input struct has extra padding at the end of the struct.
- Otherwise it is identical to the 32-bit version. */
-static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up)
+/*
+ * The 64-bit v4l2_input struct has extra padding at the end of the struct.
+ * Otherwise it is identical to the 32-bit version.
+ */
+static inline int get_v4l2_input32(struct v4l2_input __user *kp,
+ struct v4l2_input32 __user *up)
{
- if (copy_from_user(kp, up, sizeof(struct v4l2_input32)))
+ if (copy_in_user(kp, up, sizeof(*up)))
return -EFAULT;
return 0;
}
-static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up)
+static inline int put_v4l2_input32(struct v4l2_input __user *kp,
+ struct v4l2_input32 __user *up)
{
- if (copy_to_user(up, kp, sizeof(struct v4l2_input32)))
+ if (copy_in_user(up, kp, sizeof(*up)))
return -EFAULT;
return 0;
}
@@ -644,58 +740,95 @@
};
} __attribute__ ((packed));
-/* The following function really belong in v4l2-common, but that causes
- a circular dependency between modules. We need to think about this, but
- for now this will do. */
-
-/* Return non-zero if this control is a pointer type. Currently only
- type STRING is a pointer type. */
-static inline int ctrl_is_pointer(u32 id)
+/* Return true if this control is a pointer type. */
+static inline bool ctrl_is_pointer(struct file *file, u32 id)
{
- switch (id) {
- case V4L2_CID_RDS_TX_PS_NAME:
- case V4L2_CID_RDS_TX_RADIO_TEXT:
- return 1;
- default:
- return 0;
+ struct video_device *vdev = video_devdata(file);
+ struct v4l2_fh *fh = NULL;
+ struct v4l2_ctrl_handler *hdl = NULL;
+ struct v4l2_query_ext_ctrl qec = { id };
+ const struct v4l2_ioctl_ops *ops = vdev->ioctl_ops;
+
+ if (test_bit(V4L2_FL_USES_V4L2_FH, &vdev->flags))
+ fh = file->private_data;
+
+ if (fh && fh->ctrl_handler)
+ hdl = fh->ctrl_handler;
+ else if (vdev->ctrl_handler)
+ hdl = vdev->ctrl_handler;
+
+ if (hdl) {
+ struct v4l2_ctrl *ctrl = v4l2_ctrl_find(hdl, id);
+
+ return ctrl && ctrl->is_ptr;
}
+
+ if (!ops || !ops->vidioc_query_ext_ctrl)
+ return false;
+
+ return !ops->vidioc_query_ext_ctrl(file, fh, &qec) &&
+ (qec.flags & V4L2_CTRL_FLAG_HAS_PAYLOAD);
}
-static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up)
+static int bufsize_v4l2_ext_controls(struct v4l2_ext_controls32 __user *up,
+ u32 *size)
+{
+ u32 count;
+
+ if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
+ get_user(count, &up->count))
+ return -EFAULT;
+ if (count > V4L2_CID_MAX_CTRLS)
+ return -EINVAL;
+ *size = count * sizeof(struct v4l2_ext_control);
+ return 0;
+}
+
+static int get_v4l2_ext_controls32(struct file *file,
+ struct v4l2_ext_controls __user *kp,
+ struct v4l2_ext_controls32 __user *up,
+ void __user *aux_buf, u32 aux_space)
{
struct v4l2_ext_control32 __user *ucontrols;
struct v4l2_ext_control __user *kcontrols;
- int n;
+ u32 count;
+ u32 n;
compat_caddr_t p;
- if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_ext_controls32)) ||
- get_user(kp->which, &up->which) ||
- get_user(kp->count, &up->count) ||
- get_user(kp->error_idx, &up->error_idx) ||
- copy_from_user(kp->reserved, up->reserved,
- sizeof(kp->reserved)))
- return -EFAULT;
- n = kp->count;
- if (n == 0) {
- kp->controls = NULL;
- return 0;
- }
+ if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
+ assign_in_user(&kp->which, &up->which) ||
+ get_user(count, &up->count) ||
+ put_user(count, &kp->count) ||
+ assign_in_user(&kp->error_idx, &up->error_idx) ||
+ copy_in_user(kp->reserved, up->reserved, sizeof(kp->reserved)))
+ return -EFAULT;
+
+ if (count == 0)
+ return put_user(NULL, &kp->controls);
+ if (count > V4L2_CID_MAX_CTRLS)
+ return -EINVAL;
if (get_user(p, &up->controls))
return -EFAULT;
ucontrols = compat_ptr(p);
- if (!access_ok(VERIFY_READ, ucontrols,
- n * sizeof(struct v4l2_ext_control32)))
+ if (!access_ok(VERIFY_READ, ucontrols, count * sizeof(*ucontrols)))
return -EFAULT;
- kcontrols = compat_alloc_user_space(n * sizeof(struct v4l2_ext_control));
- kp->controls = (__force struct v4l2_ext_control *)kcontrols;
- while (--n >= 0) {
+ if (aux_space < count * sizeof(*kcontrols))
+ return -EFAULT;
+ kcontrols = aux_buf;
+ if (put_user((__force struct v4l2_ext_control *)kcontrols,
+ &kp->controls))
+ return -EFAULT;
+
+ for (n = 0; n < count; n++) {
u32 id;
if (copy_in_user(kcontrols, ucontrols, sizeof(*ucontrols)))
return -EFAULT;
+
if (get_user(id, &kcontrols->id))
return -EFAULT;
- if (ctrl_is_pointer(id)) {
+
+ if (ctrl_is_pointer(file, id)) {
void __user *s;
if (get_user(p, &ucontrols->string))
@@ -710,43 +843,55 @@
return 0;
}
-static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up)
+static int put_v4l2_ext_controls32(struct file *file,
+ struct v4l2_ext_controls __user *kp,
+ struct v4l2_ext_controls32 __user *up)
{
struct v4l2_ext_control32 __user *ucontrols;
- struct v4l2_ext_control __user *kcontrols =
- (__force struct v4l2_ext_control __user *)kp->controls;
- int n = kp->count;
+ struct v4l2_ext_control __user *kcontrols;
+ u32 count;
+ u32 n;
compat_caddr_t p;
- if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_ext_controls32)) ||
- put_user(kp->which, &up->which) ||
- put_user(kp->count, &up->count) ||
- put_user(kp->error_idx, &up->error_idx) ||
- copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved)))
- return -EFAULT;
- if (!kp->count)
- return 0;
+ if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
+ assign_in_user(&up->which, &kp->which) ||
+ get_user(count, &kp->count) ||
+ put_user(count, &up->count) ||
+ assign_in_user(&up->error_idx, &kp->error_idx) ||
+ copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved)) ||
+ get_user(kcontrols, &kp->controls))
+ return -EFAULT;
+ if (!count)
+ return 0;
if (get_user(p, &up->controls))
return -EFAULT;
ucontrols = compat_ptr(p);
- if (!access_ok(VERIFY_WRITE, ucontrols,
- n * sizeof(struct v4l2_ext_control32)))
+ if (!access_ok(VERIFY_WRITE, ucontrols, count * sizeof(*ucontrols)))
return -EFAULT;
- while (--n >= 0) {
- unsigned size = sizeof(*ucontrols);
+ for (n = 0; n < count; n++) {
+ unsigned int size = sizeof(*ucontrols);
u32 id;
- if (get_user(id, &kcontrols->id))
+ if (get_user(id, &kcontrols->id) ||
+ put_user(id, &ucontrols->id) ||
+ assign_in_user(&ucontrols->size, &kcontrols->size) ||
+ copy_in_user(&ucontrols->reserved2, &kcontrols->reserved2,
+ sizeof(ucontrols->reserved2)))
return -EFAULT;
- /* Do not modify the pointer when copying a pointer control.
- The contents of the pointer was changed, not the pointer
- itself. */
- if (ctrl_is_pointer(id))
+
+ /*
+ * Do not modify the pointer when copying a pointer control.
+ * The contents of the pointer was changed, not the pointer
+ * itself.
+ */
+ if (ctrl_is_pointer(file, id))
size -= sizeof(ucontrols->value64);
+
if (copy_in_user(ucontrols, kcontrols, size))
return -EFAULT;
+
ucontrols++;
kcontrols++;
}
@@ -766,18 +911,19 @@
__u32 reserved[8];
};
-static int put_v4l2_event32(struct v4l2_event *kp, struct v4l2_event32 __user *up)
+static int put_v4l2_event32(struct v4l2_event __user *kp,
+ struct v4l2_event32 __user *up)
{
- if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_event32)) ||
- put_user(kp->type, &up->type) ||
- copy_to_user(&up->u, &kp->u, sizeof(kp->u)) ||
- put_user(kp->pending, &up->pending) ||
- put_user(kp->sequence, &up->sequence) ||
- put_user(kp->timestamp.tv_sec, &up->timestamp.tv_sec) ||
- put_user(kp->timestamp.tv_nsec, &up->timestamp.tv_nsec) ||
- put_user(kp->id, &up->id) ||
- copy_to_user(up->reserved, kp->reserved, 8 * sizeof(__u32)))
- return -EFAULT;
+ if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
+ assign_in_user(&up->type, &kp->type) ||
+ copy_in_user(&up->u, &kp->u, sizeof(kp->u)) ||
+ assign_in_user(&up->pending, &kp->pending) ||
+ assign_in_user(&up->sequence, &kp->sequence) ||
+ assign_in_user(&up->timestamp.tv_sec, &kp->timestamp.tv_sec) ||
+ assign_in_user(&up->timestamp.tv_nsec, &kp->timestamp.tv_nsec) ||
+ assign_in_user(&up->id, &kp->id) ||
+ copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved)))
+ return -EFAULT;
return 0;
}
@@ -789,32 +935,35 @@
compat_caddr_t edid;
};
-static int get_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up)
+static int get_v4l2_edid32(struct v4l2_edid __user *kp,
+ struct v4l2_edid32 __user *up)
{
- u32 tmp;
+ compat_uptr_t tmp;
- if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_edid32)) ||
- get_user(kp->pad, &up->pad) ||
- get_user(kp->start_block, &up->start_block) ||
- get_user(kp->blocks, &up->blocks) ||
- get_user(tmp, &up->edid) ||
- copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved)))
- return -EFAULT;
- kp->edid = (__force u8 *)compat_ptr(tmp);
+ if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
+ assign_in_user(&kp->pad, &up->pad) ||
+ assign_in_user(&kp->start_block, &up->start_block) ||
+ assign_in_user(&kp->blocks, &up->blocks) ||
+ get_user(tmp, &up->edid) ||
+ put_user(compat_ptr(tmp), &kp->edid) ||
+ copy_in_user(kp->reserved, up->reserved, sizeof(kp->reserved)))
+ return -EFAULT;
return 0;
}
-static int put_v4l2_edid32(struct v4l2_edid *kp, struct v4l2_edid32 __user *up)
+static int put_v4l2_edid32(struct v4l2_edid __user *kp,
+ struct v4l2_edid32 __user *up)
{
- u32 tmp = (u32)((unsigned long)kp->edid);
+ void *edid;
- if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_edid32)) ||
- put_user(kp->pad, &up->pad) ||
- put_user(kp->start_block, &up->start_block) ||
- put_user(kp->blocks, &up->blocks) ||
- put_user(tmp, &up->edid) ||
- copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved)))
- return -EFAULT;
+ if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
+ assign_in_user(&up->pad, &kp->pad) ||
+ assign_in_user(&up->start_block, &kp->start_block) ||
+ assign_in_user(&up->blocks, &kp->blocks) ||
+ get_user(edid, &kp->edid) ||
+ put_user(ptr_to_compat(edid), &up->edid) ||
+ copy_in_user(up->reserved, kp->reserved, sizeof(up->reserved)))
+ return -EFAULT;
return 0;
}
@@ -830,7 +979,7 @@
#define VIDIOC_ENUMINPUT32 _IOWR('V', 26, struct v4l2_input32)
#define VIDIOC_G_EDID32 _IOWR('V', 40, struct v4l2_edid32)
#define VIDIOC_S_EDID32 _IOWR('V', 41, struct v4l2_edid32)
-#define VIDIOC_TRY_FMT32 _IOWR('V', 64, struct v4l2_format32)
+#define VIDIOC_TRY_FMT32 _IOWR('V', 64, struct v4l2_format32)
#define VIDIOC_G_EXT_CTRLS32 _IOWR('V', 71, struct v4l2_ext_controls32)
#define VIDIOC_S_EXT_CTRLS32 _IOWR('V', 72, struct v4l2_ext_controls32)
#define VIDIOC_TRY_EXT_CTRLS32 _IOWR('V', 73, struct v4l2_ext_controls32)
@@ -846,22 +995,23 @@
#define VIDIOC_G_OUTPUT32 _IOR ('V', 46, s32)
#define VIDIOC_S_OUTPUT32 _IOWR('V', 47, s32)
+static int alloc_userspace(unsigned int size, u32 aux_space,
+ void __user **up_native)
+{
+ *up_native = compat_alloc_user_space(size + aux_space);
+ if (!*up_native)
+ return -ENOMEM;
+ if (clear_user(*up_native, size))
+ return -EFAULT;
+ return 0;
+}
+
static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- union {
- struct v4l2_format v2f;
- struct v4l2_buffer v2b;
- struct v4l2_framebuffer v2fb;
- struct v4l2_input v2i;
- struct v4l2_standard v2s;
- struct v4l2_ext_controls v2ecs;
- struct v4l2_event v2ev;
- struct v4l2_create_buffers v2crt;
- struct v4l2_edid v2edid;
- unsigned long vx;
- int vi;
- } karg;
void __user *up = compat_ptr(arg);
+ void __user *up_native = NULL;
+ void __user *aux_buf;
+ u32 aux_space;
int compatible_arg = 1;
long err = 0;
@@ -900,30 +1050,52 @@
case VIDIOC_STREAMOFF:
case VIDIOC_S_INPUT:
case VIDIOC_S_OUTPUT:
- err = get_user(karg.vi, (s32 __user *)up);
+ err = alloc_userspace(sizeof(unsigned int), 0, &up_native);
+ if (!err && assign_in_user((unsigned int __user *)up_native,
+ (compat_uint_t __user *)up))
+ err = -EFAULT;
compatible_arg = 0;
break;
case VIDIOC_G_INPUT:
case VIDIOC_G_OUTPUT:
+ err = alloc_userspace(sizeof(unsigned int), 0, &up_native);
compatible_arg = 0;
break;
case VIDIOC_G_EDID:
case VIDIOC_S_EDID:
- err = get_v4l2_edid32(&karg.v2edid, up);
+ err = alloc_userspace(sizeof(struct v4l2_edid), 0, &up_native);
+ if (!err)
+ err = get_v4l2_edid32(up_native, up);
compatible_arg = 0;
break;
case VIDIOC_G_FMT:
case VIDIOC_S_FMT:
case VIDIOC_TRY_FMT:
- err = get_v4l2_format32(&karg.v2f, up);
+ err = bufsize_v4l2_format(up, &aux_space);
+ if (!err)
+ err = alloc_userspace(sizeof(struct v4l2_format),
+ aux_space, &up_native);
+ if (!err) {
+ aux_buf = up_native + sizeof(struct v4l2_format);
+ err = get_v4l2_format32(up_native, up,
+ aux_buf, aux_space);
+ }
compatible_arg = 0;
break;
case VIDIOC_CREATE_BUFS:
- err = get_v4l2_create32(&karg.v2crt, up);
+ err = bufsize_v4l2_create(up, &aux_space);
+ if (!err)
+ err = alloc_userspace(sizeof(struct v4l2_create_buffers),
+ aux_space, &up_native);
+ if (!err) {
+ aux_buf = up_native + sizeof(struct v4l2_create_buffers);
+ err = get_v4l2_create32(up_native, up,
+ aux_buf, aux_space);
+ }
compatible_arg = 0;
break;
@@ -931,36 +1103,63 @@
case VIDIOC_QUERYBUF:
case VIDIOC_QBUF:
case VIDIOC_DQBUF:
- err = get_v4l2_buffer32(&karg.v2b, up);
+ err = bufsize_v4l2_buffer(up, &aux_space);
+ if (!err)
+ err = alloc_userspace(sizeof(struct v4l2_buffer),
+ aux_space, &up_native);
+ if (!err) {
+ aux_buf = up_native + sizeof(struct v4l2_buffer);
+ err = get_v4l2_buffer32(up_native, up,
+ aux_buf, aux_space);
+ }
compatible_arg = 0;
break;
case VIDIOC_S_FBUF:
- err = get_v4l2_framebuffer32(&karg.v2fb, up);
+ err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0,
+ &up_native);
+ if (!err)
+ err = get_v4l2_framebuffer32(up_native, up);
compatible_arg = 0;
break;
case VIDIOC_G_FBUF:
+ err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0,
+ &up_native);
compatible_arg = 0;
break;
case VIDIOC_ENUMSTD:
- err = get_v4l2_standard32(&karg.v2s, up);
+ err = alloc_userspace(sizeof(struct v4l2_standard), 0,
+ &up_native);
+ if (!err)
+ err = get_v4l2_standard32(up_native, up);
compatible_arg = 0;
break;
case VIDIOC_ENUMINPUT:
- err = get_v4l2_input32(&karg.v2i, up);
+ err = alloc_userspace(sizeof(struct v4l2_input), 0, &up_native);
+ if (!err)
+ err = get_v4l2_input32(up_native, up);
compatible_arg = 0;
break;
case VIDIOC_G_EXT_CTRLS:
case VIDIOC_S_EXT_CTRLS:
case VIDIOC_TRY_EXT_CTRLS:
- err = get_v4l2_ext_controls32(&karg.v2ecs, up);
+ err = bufsize_v4l2_ext_controls(up, &aux_space);
+ if (!err)
+ err = alloc_userspace(sizeof(struct v4l2_ext_controls),
+ aux_space, &up_native);
+ if (!err) {
+ aux_buf = up_native + sizeof(struct v4l2_ext_controls);
+ err = get_v4l2_ext_controls32(file, up_native, up,
+ aux_buf, aux_space);
+ }
compatible_arg = 0;
break;
case VIDIOC_DQEVENT:
+ err = alloc_userspace(sizeof(struct v4l2_event), 0, &up_native);
compatible_arg = 0;
break;
}
@@ -969,22 +1168,26 @@
if (compatible_arg)
err = native_ioctl(file, cmd, (unsigned long)up);
- else {
- mm_segment_t old_fs = get_fs();
+ else
+ err = native_ioctl(file, cmd, (unsigned long)up_native);
- set_fs(KERNEL_DS);
- err = native_ioctl(file, cmd, (unsigned long)&karg);
- set_fs(old_fs);
- }
+ if (err == -ENOTTY)
+ return err;
- /* Special case: even after an error we need to put the
- results back for these ioctls since the error_idx will
- contain information on which control failed. */
+ /*
+ * Special case: even after an error we need to put the
+ * results back for these ioctls since the error_idx will
+ * contain information on which control failed.
+ */
switch (cmd) {
case VIDIOC_G_EXT_CTRLS:
case VIDIOC_S_EXT_CTRLS:
case VIDIOC_TRY_EXT_CTRLS:
- if (put_v4l2_ext_controls32(&karg.v2ecs, up))
+ if (put_v4l2_ext_controls32(file, up_native, up))
+ err = -EFAULT;
+ break;
+ case VIDIOC_S_EDID:
+ if (put_v4l2_edid32(up_native, up))
err = -EFAULT;
break;
}
@@ -996,44 +1199,46 @@
case VIDIOC_S_OUTPUT:
case VIDIOC_G_INPUT:
case VIDIOC_G_OUTPUT:
- err = put_user(((s32)karg.vi), (s32 __user *)up);
+ if (assign_in_user((compat_uint_t __user *)up,
+ ((unsigned int __user *)up_native)))
+ err = -EFAULT;
break;
case VIDIOC_G_FBUF:
- err = put_v4l2_framebuffer32(&karg.v2fb, up);
+ err = put_v4l2_framebuffer32(up_native, up);
break;
case VIDIOC_DQEVENT:
- err = put_v4l2_event32(&karg.v2ev, up);
+ err = put_v4l2_event32(up_native, up);
break;
case VIDIOC_G_EDID:
- case VIDIOC_S_EDID:
- err = put_v4l2_edid32(&karg.v2edid, up);
+ err = put_v4l2_edid32(up_native, up);
break;
case VIDIOC_G_FMT:
case VIDIOC_S_FMT:
case VIDIOC_TRY_FMT:
- err = put_v4l2_format32(&karg.v2f, up);
+ err = put_v4l2_format32(up_native, up);
break;
case VIDIOC_CREATE_BUFS:
- err = put_v4l2_create32(&karg.v2crt, up);
+ err = put_v4l2_create32(up_native, up);
break;
+ case VIDIOC_PREPARE_BUF:
case VIDIOC_QUERYBUF:
case VIDIOC_QBUF:
case VIDIOC_DQBUF:
- err = put_v4l2_buffer32(&karg.v2b, up);
+ err = put_v4l2_buffer32(up_native, up);
break;
case VIDIOC_ENUMSTD:
- err = put_v4l2_standard32(&karg.v2s, up);
+ err = put_v4l2_standard32(up_native, up);
break;
case VIDIOC_ENUMINPUT:
- err = put_v4l2_input32(&karg.v2i, up);
+ err = put_v4l2_input32(up_native, up);
break;
}
return err;
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index c52d94c..4510e8a 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -2862,8 +2862,11 @@
/* Handles IOCTL */
err = func(file, cmd, parg);
- if (err == -ENOIOCTLCMD)
+ if (err == -ENOTTY || err == -ENOIOCTLCMD) {
err = -ENOTTY;
+ goto out;
+ }
+
if (err == 0) {
if (cmd == VIDIOC_DQBUF)
trace_v4l2_dqbuf(video_devdata(file)->minor, parg);
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index 5457c36..bf0fe01 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -1947,9 +1947,7 @@
if (!of_property_read_u32(child, "dma-channel", &val))
gpmc_onenand_data->dma_channel = val;
- gpmc_onenand_init(gpmc_onenand_data);
-
- return 0;
+ return gpmc_onenand_init(gpmc_onenand_data);
}
#else
static int gpmc_probe_onenand_child(struct platform_device *pdev,
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c
index a518832..59dbdaa 100644
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -664,6 +664,7 @@
sizeof(struct ec_response_get_protocol_info);
ec_dev->dout_size = sizeof(struct ec_host_request);
+ ec_spi->last_transfer_ns = ktime_get_ns();
err = cros_ec_register(ec_dev);
if (err) {
diff --git a/drivers/mfd/fsl-imx25-tsadc.c b/drivers/mfd/fsl-imx25-tsadc.c
index 77b2675..92e1760 100644
--- a/drivers/mfd/fsl-imx25-tsadc.c
+++ b/drivers/mfd/fsl-imx25-tsadc.c
@@ -183,6 +183,19 @@
return 0;
}
+static int mx25_tsadc_remove(struct platform_device *pdev)
+{
+ struct mx25_tsadc *tsadc = platform_get_drvdata(pdev);
+ int irq = platform_get_irq(pdev, 0);
+
+ if (irq) {
+ irq_set_chained_handler_and_data(irq, NULL, NULL);
+ irq_domain_remove(tsadc->domain);
+ }
+
+ return 0;
+}
+
static const struct of_device_id mx25_tsadc_ids[] = {
{ .compatible = "fsl,imx25-tsadc" },
{ /* Sentinel */ }
@@ -194,6 +207,7 @@
.of_match_table = of_match_ptr(mx25_tsadc_ids),
},
.probe = mx25_tsadc_probe,
+ .remove = mx25_tsadc_remove,
};
module_platform_driver(mx25_tsadc_driver);
diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c
index 0a16064..cc832d3 100644
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c
@@ -159,13 +159,18 @@
EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
- struct device_node *node)
+ struct device_node *parent)
{
+ struct device_node *node;
+
if (pdata && pdata->codec)
return true;
- if (of_find_node_by_name(node, "codec"))
+ node = of_get_child_by_name(parent, "codec");
+ if (node) {
+ of_node_put(node);
return true;
+ }
return false;
}
diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 1beb722..e1e69a4 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -701,6 +701,7 @@
TWL_RESOURCE_RESET(RES_MAIN_REF),
TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R2),
TWL_RESOURCE_RESET(RES_VUSB_3V1),
+ TWL_RESOURCE_RESET(RES_VMMC1),
TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R1),
TWL_RESOURCE_GROUP_RESET(RES_GRP_RC, RES_TYPE_ALL, RES_TYPE2_R0),
TWL_RESOURCE_ON(RES_RESET),
diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c
index d66502d..dd19f17 100644
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -97,12 +97,16 @@
};
-static bool twl6040_has_vibra(struct device_node *node)
+static bool twl6040_has_vibra(struct device_node *parent)
{
-#ifdef CONFIG_OF
- if (of_find_node_by_name(node, "vibra"))
+ struct device_node *node;
+
+ node = of_get_child_by_name(parent, "vibra");
+ if (node) {
+ of_node_put(node);
return true;
-#endif
+ }
+
return false;
}
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index eef202d..a5422f4 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1758,6 +1758,9 @@
/* There should only be one entry, but go through the list
* anyway
*/
+ if (afu->phb == NULL)
+ return result;
+
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
if (!afu_dev->driver)
continue;
@@ -1801,6 +1804,11 @@
/* Only participate in EEH if we are on a virtual PHB */
if (afu->phb == NULL)
return PCI_ERS_RESULT_NONE;
+
+ /*
+ * Tell the AFU drivers; but we don't care what they
+ * say, we're going away.
+ */
cxl_vphb_error_detected(afu, state);
}
return PCI_ERS_RESULT_DISCONNECT;
@@ -1941,6 +1949,9 @@
if (cxl_afu_select_best_mode(afu))
goto err;
+ if (afu->phb == NULL)
+ continue;
+
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
/* Reset the device context.
* TODO: make this less disruptive
@@ -2003,6 +2014,9 @@
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
+ if (afu->phb == NULL)
+ continue;
+
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
if (afu_dev->driver && afu_dev->driver->err_handler &&
afu_dev->driver->err_handler->resume)
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 051b147..d8a485f 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -365,7 +365,8 @@
memset(msg, 0, sizeof(msg));
msg[0].addr = client->addr;
msg[0].buf = addrbuf;
- addrbuf[0] = 0x90 + offset;
+ /* EUI-48 starts from 0x9a, EUI-64 from 0x98 */
+ addrbuf[0] = 0xa0 - at24->chip.byte_len + offset;
msg[0].len = 1;
msg[1].addr = client->addr;
msg[1].flags = I2C_M_RD;
@@ -506,6 +507,9 @@
if (unlikely(!count))
return count;
+ if (off + count > at24->chip.byte_len)
+ return -EINVAL;
+
/*
* Read data from chip, protecting against concurrent updates
* from this host, but not from other I2C masters.
@@ -538,6 +542,9 @@
if (unlikely(!count))
return -EINVAL;
+ if (off + count > at24->chip.byte_len)
+ return -EINVAL;
+
/*
* Write data to chip, protecting against concurrent updates
* from this host, but not from other I2C masters.
@@ -638,6 +645,16 @@
dev_warn(&client->dev,
"page_size looks suspicious (no power of 2)!\n");
+ /*
+ * REVISIT: the size of the EUI-48 byte array is 6 in at24mac402, while
+ * the call to ilog2() in AT24_DEVICE_MAGIC() rounds it down to 4.
+ *
+ * Eventually we'll get rid of the magic values altoghether in favor of
+ * real structs, but for now just manually set the right size.
+ */
+ if (chip.flags & AT24_FLAG_MAC && chip.byte_len == 4)
+ chip.byte_len = 6;
+
/* Use I2C operations unless we're stuck with SMBus extensions. */
if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
if (chip.flags & AT24_FLAG_ADDR16)
@@ -766,7 +783,7 @@
at24->nvmem_config.reg_read = at24_read;
at24->nvmem_config.reg_write = at24_write;
at24->nvmem_config.priv = at24;
- at24->nvmem_config.stride = 4;
+ at24->nvmem_config.stride = 1;
at24->nvmem_config.word_size = 1;
at24->nvmem_config.size = chip.byte_len;
diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c
index 7d69dd5..42a513c 100644
--- a/drivers/misc/uid_sys_stats.c
+++ b/drivers/misc/uid_sys_stats.c
@@ -1,4 +1,4 @@
-/* drivers/misc/uid_cputime.c
+/* drivers/misc/uid_sys_stats.c
*
* Copyright (C) 2014 - 2015 Google, Inc.
*
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index c64266f..60ebe5b 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -155,6 +155,9 @@
return ret;
ret = host->bus_ops->suspend(host);
+ if (ret)
+ pm_generic_resume(dev);
+
return ret;
}
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 40ddc3e..00b9d7e 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -209,9 +209,10 @@
completion = ktime_get();
delta_us = ktime_us_delta(completion,
mrq->io_start);
- blk_update_latency_hist(&host->io_lat_s,
- (mrq->data->flags & MMC_DATA_READ),
- delta_us);
+ blk_update_latency_hist(
+ (mrq->data->flags & MMC_DATA_READ) ?
+ &host->io_lat_read :
+ &host->io_lat_write, delta_us);
}
#endif
}
@@ -3100,8 +3101,14 @@
latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct mmc_host *host = cls_dev_to_mmc_host(dev);
+ size_t written_bytes;
- return blk_latency_hist_show(&host->io_lat_s, buf);
+ written_bytes = blk_latency_hist_show("Read", &host->io_lat_read,
+ buf, PAGE_SIZE);
+ written_bytes += blk_latency_hist_show("Write", &host->io_lat_write,
+ buf + written_bytes, PAGE_SIZE - written_bytes);
+
+ return written_bytes;
}
/*
@@ -3119,9 +3126,10 @@
if (kstrtol(buf, 0, &value))
return -EINVAL;
- if (value == BLK_IO_LAT_HIST_ZERO)
- blk_zero_latency_hist(&host->io_lat_s);
- else if (value == BLK_IO_LAT_HIST_ENABLE ||
+ if (value == BLK_IO_LAT_HIST_ZERO) {
+ memset(&host->io_lat_read, 0, sizeof(host->io_lat_read));
+ memset(&host->io_lat_write, 0, sizeof(host->io_lat_write));
+ } else if (value == BLK_IO_LAT_HIST_ENABLE ||
value == BLK_IO_LAT_HIST_DISABLE)
host->latency_hist_enabled = value;
return count;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index e0218b3..3e5954f 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -763,7 +763,7 @@
MMC_DEV_ATTR(enhanced_area_size, "%u\n", card->ext_csd.enhanced_area_size);
MMC_DEV_ATTR(raw_rpmb_size_mult, "%#x\n", card->ext_csd.raw_rpmb_size_mult);
MMC_DEV_ATTR(rel_sectors, "%#x\n", card->ext_csd.rel_sectors);
-MMC_DEV_ATTR(ocr, "%08x\n", card->ocr);
+MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr);
static ssize_t mmc_fwrev_show(struct device *dev,
struct device_attribute *attr,
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index a0aa64e..ad70934 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -683,7 +683,7 @@
MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name);
MMC_DEV_ATTR(oemid, "0x%04x\n", card->cid.oemid);
MMC_DEV_ATTR(serial, "0x%08x\n", card->cid.serial);
-MMC_DEV_ATTR(ocr, "%08x\n", card->ocr);
+MMC_DEV_ATTR(ocr, "0x%08x\n", card->ocr);
static ssize_t mmc_dsr_show(struct device *dev,
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 84e9afc..6f9535e 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -579,7 +579,7 @@
}
}
sdr_set_field(host->base + MSDC_CFG, MSDC_CFG_CKMOD | MSDC_CFG_CKDIV,
- (mode << 8) | (div % 0xff));
+ (mode << 8) | div);
sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_CKPDN);
while (!(readl(host->base + MSDC_CFG) & MSDC_CFG_CKSTB))
cpu_relax();
@@ -1562,7 +1562,7 @@
host->src_clk_freq = clk_get_rate(host->src_clk);
/* Set host parameters to mmc */
mmc->ops = &mt_msdc_ops;
- mmc->f_min = host->src_clk_freq / (4 * 255);
+ mmc->f_min = DIV_ROUND_UP(host->src_clk_freq, 4 * 255);
mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23;
/* MMC core transfer sizes tunable parameters */
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 90ed2e1..80c89a3 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -642,6 +642,21 @@
CORE_VENDOR_SPEC_CAPABILITIES0);
}
+ /*
+ * Power on reset state may trigger power irq if previous status of
+ * PWRCTL was either BUS_ON or IO_HIGH_V. So before enabling pwr irq
+ * interrupt in GIC, any pending power irq interrupt should be
+ * acknowledged. Otherwise power irq interrupt handler would be
+ * fired prematurely.
+ */
+ sdhci_msm_voltage_switch(host);
+
+ /*
+ * Ensure that above writes are propogated before interrupt enablement
+ * in GIC.
+ */
+ mb();
+
/* Setup IRQ for handling power/voltage tasks with PMIC */
msm_host->pwr_irq = platform_get_irq_byname(pdev, "pwr_irq");
if (msm_host->pwr_irq < 0) {
@@ -651,6 +666,9 @@
goto clk_disable;
}
+ /* Enable pwr irq interrupts */
+ writel_relaxed(INT_MASK, msm_host->core_mem + CORE_PWRCTL_MASK);
+
ret = devm_request_threaded_irq(&pdev->dev, msm_host->pwr_irq, NULL,
sdhci_msm_pwr_irq, IRQF_ONESHOT,
dev_name(&pdev->dev), host);
diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index d9fab22..1a4a790 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -2193,16 +2193,9 @@
if (ctrl->nand_version >= 0x0702)
tmp |= ACC_CONTROL_RD_ERASED;
tmp &= ~ACC_CONTROL_FAST_PGM_RDIN;
- if (ctrl->features & BRCMNAND_HAS_PREFETCH) {
- /*
- * FIXME: Flash DMA + prefetch may see spurious erased-page ECC
- * errors
- */
- if (has_flash_dma(ctrl))
- tmp &= ~ACC_CONTROL_PREFETCH;
- else
- tmp |= ACC_CONTROL_PREFETCH;
- }
+ if (ctrl->features & BRCMNAND_HAS_PREFETCH)
+ tmp &= ~ACC_CONTROL_PREFETCH;
+
nand_writereg(ctrl, offs, tmp);
return 0;
diff --git a/drivers/mtd/nand/denali_pci.c b/drivers/mtd/nand/denali_pci.c
index de31514..d38527e 100644
--- a/drivers/mtd/nand/denali_pci.c
+++ b/drivers/mtd/nand/denali_pci.c
@@ -119,3 +119,7 @@
};
module_pci_driver(denali_pci_driver);
+
+MODULE_DESCRIPTION("PCI driver for Denali NAND controller");
+MODULE_AUTHOR("Intel Corporation and its suppliers");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index a77cfd7..21c0308 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2320,6 +2320,7 @@
static int nand_do_read_oob(struct mtd_info *mtd, loff_t from,
struct mtd_oob_ops *ops)
{
+ unsigned int max_bitflips = 0;
int page, realpage, chipnr;
struct nand_chip *chip = mtd_to_nand(mtd);
struct mtd_ecc_stats stats;
@@ -2377,6 +2378,8 @@
nand_wait_ready(mtd);
}
+ max_bitflips = max_t(unsigned int, max_bitflips, ret);
+
readlen -= len;
if (!readlen)
break;
@@ -2402,7 +2405,7 @@
if (mtd->ecc_stats.failed - stats.failed)
return -EBADMSG;
- return mtd->ecc_stats.corrected - stats.corrected ? -EUCLEAN : 0;
+ return max_bitflips;
}
/**
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index b121bf4..3b8911c 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -950,6 +950,7 @@
switch (command) {
case NAND_CMD_READ0:
+ case NAND_CMD_READOOB:
case NAND_CMD_PAGEPROG:
info->use_ecc = 1;
break;
diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
index f9b2a77..e26c4f8 100644
--- a/drivers/mtd/nand/sunxi_nand.c
+++ b/drivers/mtd/nand/sunxi_nand.c
@@ -1835,8 +1835,14 @@
/* Add ECC info retrieval from DT */
for (i = 0; i < ARRAY_SIZE(strengths); i++) {
- if (ecc->strength <= strengths[i])
+ if (ecc->strength <= strengths[i]) {
+ /*
+ * Update ecc->strength value with the actual strength
+ * that will be used by the ECC engine.
+ */
+ ecc->strength = strengths[i];
break;
+ }
}
if (i >= ARRAY_SIZE(strengths)) {
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index d1e6931..46913ef2 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -99,6 +99,8 @@
/* Linked list of all ubiblock instances */
static LIST_HEAD(ubiblock_devices);
+static DEFINE_IDR(ubiblock_minor_idr);
+/* Protects ubiblock_devices and ubiblock_minor_idr */
static DEFINE_MUTEX(devices_mutex);
static int ubiblock_major;
@@ -353,8 +355,6 @@
.init_request = ubiblock_init_request,
};
-static DEFINE_IDR(ubiblock_minor_idr);
-
int ubiblock_create(struct ubi_volume_info *vi)
{
struct ubiblock *dev;
@@ -367,14 +367,15 @@
/* Check that the volume isn't already handled */
mutex_lock(&devices_mutex);
if (find_dev_nolock(vi->ubi_num, vi->vol_id)) {
- mutex_unlock(&devices_mutex);
- return -EEXIST;
+ ret = -EEXIST;
+ goto out_unlock;
}
- mutex_unlock(&devices_mutex);
dev = kzalloc(sizeof(struct ubiblock), GFP_KERNEL);
- if (!dev)
- return -ENOMEM;
+ if (!dev) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
mutex_init(&dev->dev_mutex);
@@ -439,14 +440,13 @@
goto out_free_queue;
}
- mutex_lock(&devices_mutex);
list_add_tail(&dev->list, &ubiblock_devices);
- mutex_unlock(&devices_mutex);
/* Must be the last step: anyone can call file ops from now on */
add_disk(dev->gd);
dev_info(disk_to_dev(dev->gd), "created from ubi%d:%d(%s)",
dev->ubi_num, dev->vol_id, vi->name);
+ mutex_unlock(&devices_mutex);
return 0;
out_free_queue:
@@ -459,6 +459,8 @@
put_disk(dev->gd);
out_free_dev:
kfree(dev);
+out_unlock:
+ mutex_unlock(&devices_mutex);
return ret;
}
@@ -480,30 +482,36 @@
int ubiblock_remove(struct ubi_volume_info *vi)
{
struct ubiblock *dev;
+ int ret;
mutex_lock(&devices_mutex);
dev = find_dev_nolock(vi->ubi_num, vi->vol_id);
if (!dev) {
- mutex_unlock(&devices_mutex);
- return -ENODEV;
+ ret = -ENODEV;
+ goto out_unlock;
}
/* Found a device, let's lock it so we can check if it's busy */
mutex_lock(&dev->dev_mutex);
if (dev->refcnt > 0) {
- mutex_unlock(&dev->dev_mutex);
- mutex_unlock(&devices_mutex);
- return -EBUSY;
+ ret = -EBUSY;
+ goto out_unlock_dev;
}
/* Remove from device list */
list_del(&dev->list);
- mutex_unlock(&devices_mutex);
-
ubiblock_cleanup(dev);
mutex_unlock(&dev->dev_mutex);
+ mutex_unlock(&devices_mutex);
+
kfree(dev);
return 0;
+
+out_unlock_dev:
+ mutex_unlock(&dev->dev_mutex);
+out_unlock:
+ mutex_unlock(&devices_mutex);
+ return ret;
}
static int ubiblock_resize(struct ubi_volume_info *vi)
@@ -632,6 +640,7 @@
struct ubiblock *next;
struct ubiblock *dev;
+ mutex_lock(&devices_mutex);
list_for_each_entry_safe(dev, next, &ubiblock_devices, list) {
/* The module is being forcefully removed */
WARN_ON(dev->desc);
@@ -640,6 +649,7 @@
ubiblock_cleanup(dev);
kfree(dev);
}
+ mutex_unlock(&devices_mutex);
}
int __init ubiblock_init(void)
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index b5b8cd6..668b462 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1529,6 +1529,46 @@
}
/**
+ * erase_aeb - erase a PEB given in UBI attach info PEB
+ * @ubi: UBI device description object
+ * @aeb: UBI attach info PEB
+ * @sync: If true, erase synchronously. Otherwise schedule for erasure
+ */
+static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync)
+{
+ struct ubi_wl_entry *e;
+ int err;
+
+ e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
+ if (!e)
+ return -ENOMEM;
+
+ e->pnum = aeb->pnum;
+ e->ec = aeb->ec;
+ ubi->lookuptbl[e->pnum] = e;
+
+ if (sync) {
+ err = sync_erase(ubi, e, false);
+ if (err)
+ goto out_free;
+
+ wl_tree_add(e, &ubi->free);
+ ubi->free_count++;
+ } else {
+ err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false);
+ if (err)
+ goto out_free;
+ }
+
+ return 0;
+
+out_free:
+ wl_entry_destroy(ubi, e);
+
+ return err;
+}
+
+/**
* ubi_wl_init - initialize the WL sub-system using attaching information.
* @ubi: UBI device description object
* @ai: attaching information
@@ -1566,18 +1606,10 @@
list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) {
cond_resched();
- e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
- if (!e)
+ err = erase_aeb(ubi, aeb, false);
+ if (err)
goto out_free;
- e->pnum = aeb->pnum;
- e->ec = aeb->ec;
- ubi->lookuptbl[e->pnum] = e;
- if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) {
- wl_entry_destroy(ubi, e);
- goto out_free;
- }
-
found_pebs++;
}
@@ -1635,6 +1667,8 @@
ubi_assert(!ubi->lookuptbl[e->pnum]);
ubi->lookuptbl[e->pnum] = e;
} else {
+ bool sync = false;
+
/*
* Usually old Fastmap PEBs are scheduled for erasure
* and we don't have to care about them but if we face
@@ -1644,18 +1678,21 @@
if (ubi->lookuptbl[aeb->pnum])
continue;
- e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
- if (!e)
- goto out_free;
+ /*
+ * The fastmap update code might not find a free PEB for
+ * writing the fastmap anchor to and then reuses the
+ * current fastmap anchor PEB. When this PEB gets erased
+ * and a power cut happens before it is written again we
+ * must make sure that the fastmap attach code doesn't
+ * find any outdated fastmap anchors, hence we erase the
+ * outdated fastmap anchor PEBs synchronously here.
+ */
+ if (aeb->vol_id == UBI_FM_SB_VOLUME_ID)
+ sync = true;
- e->pnum = aeb->pnum;
- e->ec = aeb->ec;
- ubi_assert(!ubi->lookuptbl[e->pnum]);
- ubi->lookuptbl[e->pnum] = e;
- if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) {
- wl_entry_destroy(ubi, e);
+ err = erase_aeb(ubi, aeb, sync);
+ if (err)
goto out_free;
- }
}
found_pebs++;
diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index e90c6a7..2e46496 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -191,7 +191,7 @@
*/
static int ipddp_create(struct ipddp_route *new_rt)
{
- struct ipddp_route *rt = kmalloc(sizeof(*rt), GFP_KERNEL);
+ struct ipddp_route *rt = kzalloc(sizeof(*rt), GFP_KERNEL);
if (rt == NULL)
return -ENOMEM;
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 6749b18..4d01d7b 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -652,6 +652,9 @@
mbx_mask = hecc_read(priv, HECC_CANMIM);
mbx_mask |= HECC_TX_MBOX_MASK;
hecc_write(priv, HECC_CANMIM, mbx_mask);
+ } else {
+ /* repoll is done only if whole budget is used */
+ num_pkts = quota;
}
return num_pkts;
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index b3d0275..b003582 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -288,6 +288,8 @@
case -ECONNRESET: /* unlink */
case -ENOENT:
+ case -EPIPE:
+ case -EPROTO:
case -ESHUTDOWN:
return;
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 9fdb0f0..c6dcf93 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -393,6 +393,8 @@
break;
case -ENOENT:
+ case -EPIPE:
+ case -EPROTO:
case -ESHUTDOWN:
return;
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index eea9aea..5d50123 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -449,7 +449,7 @@
dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)",
rc);
- return rc;
+ return (rc > 0) ? 0 : rc;
}
static void gs_usb_xmit_callback(struct urb *urb)
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 4224e06..c9d61a6 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -609,8 +609,8 @@
}
if (pos + tmp->len > actual_len) {
- dev_err(dev->udev->dev.parent,
- "Format error\n");
+ dev_err_ratelimited(dev->udev->dev.parent,
+ "Format error\n");
break;
}
@@ -813,6 +813,7 @@
if (err) {
netdev_err(netdev, "Error transmitting URB\n");
usb_unanchor_urb(urb);
+ kfree(buf);
usb_free_urb(urb);
return err;
}
@@ -1325,6 +1326,8 @@
case 0:
break;
case -ENOENT:
+ case -EPIPE:
+ case -EPROTO:
case -ESHUTDOWN:
return;
default:
@@ -1333,7 +1336,7 @@
goto resubmit_urb;
}
- while (pos <= urb->actual_length - MSG_HEADER_LEN) {
+ while (pos <= (int)(urb->actual_length - MSG_HEADER_LEN)) {
msg = urb->transfer_buffer + pos;
/* The Kvaser firmware can only read and write messages that
@@ -1352,7 +1355,8 @@
}
if (pos + msg->len > urb->actual_length) {
- dev_err(dev->udev->dev.parent, "Format error\n");
+ dev_err_ratelimited(dev->udev->dev.parent,
+ "Format error\n");
break;
}
@@ -1768,6 +1772,7 @@
spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
usb_unanchor_urb(urb);
+ kfree(buf);
stats->tx_dropped++;
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 3047325..7f5ec40 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -184,7 +184,7 @@
void *cmd_head = pcan_usb_fd_cmd_buffer(dev);
int err = 0;
u8 *packet_ptr;
- int i, n = 1, packet_len;
+ int packet_len;
ptrdiff_t cmd_len;
/* usb device unregistered? */
@@ -201,17 +201,13 @@
}
packet_ptr = cmd_head;
+ packet_len = cmd_len;
/* firmware is not able to re-assemble 512 bytes buffer in full-speed */
- if ((dev->udev->speed != USB_SPEED_HIGH) &&
- (cmd_len > PCAN_UFD_LOSPD_PKT_SIZE)) {
- packet_len = PCAN_UFD_LOSPD_PKT_SIZE;
- n += cmd_len / packet_len;
- } else {
- packet_len = cmd_len;
- }
+ if (unlikely(dev->udev->speed != USB_SPEED_HIGH))
+ packet_len = min(packet_len, PCAN_UFD_LOSPD_PKT_SIZE);
- for (i = 0; i < n; i++) {
+ do {
err = usb_bulk_msg(dev->udev,
usb_sndbulkpipe(dev->udev,
PCAN_USBPRO_EP_CMDOUT),
@@ -224,7 +220,12 @@
}
packet_ptr += packet_len;
- }
+ cmd_len -= packet_len;
+
+ if (cmd_len < PCAN_UFD_LOSPD_PKT_SIZE)
+ packet_len = cmd_len;
+
+ } while (packet_len > 0);
return err;
}
diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index d000cb6..27861c4 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -524,6 +524,8 @@
break;
case -ENOENT:
+ case -EPIPE:
+ case -EPROTO:
case -ESHUTDOWN:
return;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index be7ec5a..744ed6d 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1023,6 +1023,18 @@
goto out;
}
+ /* The Ethernet switch we are interfaced with needs packets to be at
+ * least 64 bytes (including FCS) otherwise they will be discarded when
+ * they enter the switch port logic. When Broadcom tags are enabled, we
+ * need to make sure that packets are at least 68 bytes
+ * (including FCS and tag) because the length verification is done after
+ * the Broadcom tag is stripped off the ingress packet.
+ */
+ if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) {
+ ret = NETDEV_TX_OK;
+ goto out;
+ }
+
/* Insert TSB and checksum infos */
if (priv->tsb_en) {
skb = bcm_sysport_insert_tsb(skb, dev);
@@ -1032,20 +1044,7 @@
}
}
- /* The Ethernet switch we are interfaced with needs packets to be at
- * least 64 bytes (including FCS) otherwise they will be discarded when
- * they enter the switch port logic. When Broadcom tags are enabled, we
- * need to make sure that packets are at least 68 bytes
- * (including FCS and tag) because the length verification is done after
- * the Broadcom tag is stripped off the ingress packet.
- */
- if (skb_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) {
- ret = NETDEV_TX_OK;
- goto out;
- }
-
- skb_len = skb->len < ETH_ZLEN + ENET_BRCM_TAG_LEN ?
- ETH_ZLEN + ENET_BRCM_TAG_LEN : skb->len;
+ skb_len = skb->len;
mapping = dma_map_single(kdev, skb->data, skb_len, DMA_TO_DEVICE);
if (dma_mapping_error(kdev, mapping)) {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 4febe60..5d958b5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13293,17 +13293,15 @@
dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA;
- /* VF with OLD Hypervisor or old PF do not support filtering */
if (IS_PF(bp)) {
if (chip_is_e1x)
bp->accept_any_vlan = true;
else
dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#ifdef CONFIG_BNX2X_SRIOV
- } else if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
- dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#endif
}
+ /* For VF we'll know whether to enable VLAN filtering after
+ * getting a response to CHANNEL_TLV_ACQUIRE from PF.
+ */
dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX;
dev->features |= NETIF_F_HIGHDMA;
@@ -13735,7 +13733,7 @@
if (!netif_running(bp->dev)) {
DP(BNX2X_MSG_PTP,
"PTP adjfreq called while the interface is down\n");
- return -EFAULT;
+ return -ENETDOWN;
}
if (ppb < 0) {
@@ -13794,6 +13792,12 @@
{
struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
+ if (!netif_running(bp->dev)) {
+ DP(BNX2X_MSG_PTP,
+ "PTP adjtime called while the interface is down\n");
+ return -ENETDOWN;
+ }
+
DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta);
timecounter_adjtime(&bp->timecounter, delta);
@@ -13806,6 +13810,12 @@
struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
u64 ns;
+ if (!netif_running(bp->dev)) {
+ DP(BNX2X_MSG_PTP,
+ "PTP gettime called while the interface is down\n");
+ return -ENETDOWN;
+ }
+
ns = timecounter_read(&bp->timecounter);
DP(BNX2X_MSG_PTP, "PTP gettime called, ns = %llu\n", ns);
@@ -13821,6 +13831,12 @@
struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
u64 ns;
+ if (!netif_running(bp->dev)) {
+ DP(BNX2X_MSG_PTP,
+ "PTP settime called while the interface is down\n");
+ return -ENETDOWN;
+ }
+
ns = timespec64_to_ns(ts);
DP(BNX2X_MSG_PTP, "PTP settime called, ns = %llu\n", ns);
@@ -13988,6 +14004,14 @@
rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count);
if (rc)
goto init_one_freemem;
+
+#ifdef CONFIG_BNX2X_SRIOV
+ /* VF with OLD Hypervisor or old PF do not support filtering */
+ if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
+ dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ }
+#endif
}
/* Enable SRIOV if capability found in configuration space */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 3f77d08..c6e0591 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -434,7 +434,9 @@
/* Add/Remove the filter */
rc = bnx2x_config_vlan_mac(bp, &ramrod);
- if (rc && rc != -EEXIST) {
+ if (rc == -EEXIST)
+ return 0;
+ if (rc) {
BNX2X_ERR("Failed to %s %s\n",
filter->add ? "add" : "delete",
(filter->type == BNX2X_VF_FILTER_VLAN_MAC) ?
@@ -444,6 +446,8 @@
return rc;
}
+ filter->applied = true;
+
return 0;
}
@@ -471,6 +475,8 @@
BNX2X_ERR("Managed only %d/%d filters - rolling back\n",
i, filters->count + 1);
while (--i >= 0) {
+ if (!filters->filters[i].applied)
+ continue;
filters->filters[i].add = !filters->filters[i].add;
bnx2x_vf_mac_vlan_config(bp, vf, qid,
&filters->filters[i],
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 7a6d406..888d0b6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -114,6 +114,7 @@
(BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/
bool add;
+ bool applied;
u8 *mac;
u16 vid;
};
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index bfae300..c2d327d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -868,7 +868,7 @@
struct bnx2x *bp = netdev_priv(dev);
struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters;
struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
- int rc, i = 0;
+ int rc = 0, i = 0;
struct netdev_hw_addr *ha;
if (bp->state != BNX2X_STATE_OPEN) {
@@ -883,6 +883,15 @@
/* Get Rx mode requested */
DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags);
+ /* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */
+ if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) {
+ DP(NETIF_MSG_IFUP,
+ "VF supports not more than %d multicast MAC addresses\n",
+ PFVF_MAX_MULTICAST_PER_VF);
+ rc = -EINVAL;
+ goto out;
+ }
+
netdev_for_each_mc_addr(ha, dev) {
DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n",
bnx2x_mc_addr(ha));
@@ -890,16 +899,6 @@
i++;
}
- /* We support four PFVF_MAX_MULTICAST_PER_VF mcast
- * addresses tops
- */
- if (i >= PFVF_MAX_MULTICAST_PER_VF) {
- DP(NETIF_MSG_IFUP,
- "VF supports not more than %d multicast MAC addresses\n",
- PFVF_MAX_MULTICAST_PER_VF);
- return -EINVAL;
- }
-
req->n_multicast = i;
req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED;
req->vf_qid = 0;
@@ -924,7 +923,7 @@
out:
bnx2x_vfpf_finalize(bp, &req->first_tlv);
- return 0;
+ return rc;
}
/* request pf to add a vlan for the vf */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 5d2cf56..bbb3641 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2381,6 +2381,18 @@
return 0;
}
+static void bnxt_init_cp_rings(struct bnxt *bp)
+{
+ int i;
+
+ for (i = 0; i < bp->cp_nr_rings; i++) {
+ struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+ struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
+
+ ring->fw_ring_id = INVALID_HW_RING_ID;
+ }
+}
+
static int bnxt_init_rx_rings(struct bnxt *bp)
{
int i, rc = 0;
@@ -4700,6 +4712,7 @@
static int bnxt_init_nic(struct bnxt *bp, bool irq_re_init)
{
+ bnxt_init_cp_rings(bp);
bnxt_init_rx_rings(bp);
bnxt_init_tx_rings(bp);
bnxt_init_ring_grps(bp, irq_re_init);
@@ -5132,8 +5145,9 @@
bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) &
PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK;
}
- link_info->support_auto_speeds =
- le16_to_cpu(resp->supported_speeds_auto_mode);
+ if (resp->supported_speeds_auto_mode)
+ link_info->support_auto_speeds =
+ le16_to_cpu(resp->supported_speeds_auto_mode);
hwrm_phy_qcaps_exit:
mutex_unlock(&bp->hwrm_cmd_lock);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index a7e04ff..cde4b96 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1843,8 +1843,8 @@
/* Read A2 portion of the EEPROM */
if (length) {
start -= ETH_MODULE_SFF_8436_LEN;
- bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, start,
- length, data);
+ rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1,
+ start, length, data);
}
return rc;
}
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 0975af2..3480b30 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1,7 +1,7 @@
/*
* Broadcom GENET (Gigabit Ethernet) controller driver
*
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -778,8 +778,9 @@
STAT_GENET_RUNT("rx_runt_bytes", mib.rx_runt_bytes),
/* Misc UniMAC counters */
STAT_GENET_MISC("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt,
- UMAC_RBUF_OVFL_CNT),
- STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT),
+ UMAC_RBUF_OVFL_CNT_V1),
+ STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt,
+ UMAC_RBUF_ERR_CNT_V1),
STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT),
STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed),
@@ -821,6 +822,45 @@
}
}
+static u32 bcmgenet_update_stat_misc(struct bcmgenet_priv *priv, u16 offset)
+{
+ u16 new_offset;
+ u32 val;
+
+ switch (offset) {
+ case UMAC_RBUF_OVFL_CNT_V1:
+ if (GENET_IS_V2(priv))
+ new_offset = RBUF_OVFL_CNT_V2;
+ else
+ new_offset = RBUF_OVFL_CNT_V3PLUS;
+
+ val = bcmgenet_rbuf_readl(priv, new_offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_rbuf_writel(priv, 0, new_offset);
+ break;
+ case UMAC_RBUF_ERR_CNT_V1:
+ if (GENET_IS_V2(priv))
+ new_offset = RBUF_ERR_CNT_V2;
+ else
+ new_offset = RBUF_ERR_CNT_V3PLUS;
+
+ val = bcmgenet_rbuf_readl(priv, new_offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_rbuf_writel(priv, 0, new_offset);
+ break;
+ default:
+ val = bcmgenet_umac_readl(priv, offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_umac_writel(priv, 0, offset);
+ break;
+ }
+
+ return val;
+}
+
static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
{
int i, j = 0;
@@ -836,19 +876,28 @@
case BCMGENET_STAT_NETDEV:
case BCMGENET_STAT_SOFT:
continue;
- case BCMGENET_STAT_MIB_RX:
- case BCMGENET_STAT_MIB_TX:
case BCMGENET_STAT_RUNT:
- if (s->type != BCMGENET_STAT_MIB_RX)
- offset = BCMGENET_STAT_OFFSET;
+ offset += BCMGENET_STAT_OFFSET;
+ /* fall through */
+ case BCMGENET_STAT_MIB_TX:
+ offset += BCMGENET_STAT_OFFSET;
+ /* fall through */
+ case BCMGENET_STAT_MIB_RX:
val = bcmgenet_umac_readl(priv,
UMAC_MIB_START + j + offset);
+ offset = 0; /* Reset Offset */
break;
case BCMGENET_STAT_MISC:
- val = bcmgenet_umac_readl(priv, s->reg_offset);
- /* clear if overflowed */
- if (val == ~0)
- bcmgenet_umac_writel(priv, 0, s->reg_offset);
+ if (GENET_IS_V1(priv)) {
+ val = bcmgenet_umac_readl(priv, s->reg_offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_umac_writel(priv, 0,
+ s->reg_offset);
+ } else {
+ val = bcmgenet_update_stat_misc(priv,
+ s->reg_offset);
+ }
break;
}
@@ -2464,24 +2513,28 @@
/* Interrupt bottom half */
static void bcmgenet_irq_task(struct work_struct *work)
{
+ unsigned long flags;
+ unsigned int status;
struct bcmgenet_priv *priv = container_of(
work, struct bcmgenet_priv, bcmgenet_irq_work);
netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
- if (priv->irq0_stat & UMAC_IRQ_MPD_R) {
- priv->irq0_stat &= ~UMAC_IRQ_MPD_R;
+ spin_lock_irqsave(&priv->lock, flags);
+ status = priv->irq0_stat;
+ priv->irq0_stat = 0;
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ if (status & UMAC_IRQ_MPD_R) {
netif_dbg(priv, wol, priv->dev,
"magic packet detected, waking up\n");
bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
}
/* Link UP/DOWN event */
- if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
+ if (status & UMAC_IRQ_LINK_EVENT)
phy_mac_interrupt(priv->phydev,
- !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
- priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
- }
+ !!(status & UMAC_IRQ_LINK_UP));
}
/* bcmgenet_isr1: handle Rx and Tx priority queues */
@@ -2490,22 +2543,21 @@
struct bcmgenet_priv *priv = dev_id;
struct bcmgenet_rx_ring *rx_ring;
struct bcmgenet_tx_ring *tx_ring;
- unsigned int index;
+ unsigned int index, status;
- /* Save irq status for bottom-half processing. */
- priv->irq1_stat =
- bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
+ /* Read irq status */
+ status = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
/* clear interrupts */
- bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
+ bcmgenet_intrl2_1_writel(priv, status, INTRL2_CPU_CLEAR);
netif_dbg(priv, intr, priv->dev,
- "%s: IRQ=0x%x\n", __func__, priv->irq1_stat);
+ "%s: IRQ=0x%x\n", __func__, status);
/* Check Rx priority queue interrupts */
for (index = 0; index < priv->hw_params->rx_queues; index++) {
- if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
+ if (!(status & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
continue;
rx_ring = &priv->rx_rings[index];
@@ -2518,7 +2570,7 @@
/* Check Tx priority queue interrupts */
for (index = 0; index < priv->hw_params->tx_queues; index++) {
- if (!(priv->irq1_stat & BIT(index)))
+ if (!(status & BIT(index)))
continue;
tx_ring = &priv->tx_rings[index];
@@ -2538,19 +2590,20 @@
struct bcmgenet_priv *priv = dev_id;
struct bcmgenet_rx_ring *rx_ring;
struct bcmgenet_tx_ring *tx_ring;
+ unsigned int status;
+ unsigned long flags;
- /* Save irq status for bottom-half processing. */
- priv->irq0_stat =
- bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
+ /* Read irq status */
+ status = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
/* clear interrupts */
- bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR);
+ bcmgenet_intrl2_0_writel(priv, status, INTRL2_CPU_CLEAR);
netif_dbg(priv, intr, priv->dev,
- "IRQ=0x%x\n", priv->irq0_stat);
+ "IRQ=0x%x\n", status);
- if (priv->irq0_stat & UMAC_IRQ_RXDMA_DONE) {
+ if (status & UMAC_IRQ_RXDMA_DONE) {
rx_ring = &priv->rx_rings[DESC_INDEX];
if (likely(napi_schedule_prep(&rx_ring->napi))) {
@@ -2559,7 +2612,7 @@
}
}
- if (priv->irq0_stat & UMAC_IRQ_TXDMA_DONE) {
+ if (status & UMAC_IRQ_TXDMA_DONE) {
tx_ring = &priv->tx_rings[DESC_INDEX];
if (likely(napi_schedule_prep(&tx_ring->napi))) {
@@ -2568,20 +2621,21 @@
}
}
- if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
- UMAC_IRQ_PHY_DET_F |
- UMAC_IRQ_LINK_EVENT |
- UMAC_IRQ_HFB_SM |
- UMAC_IRQ_HFB_MM |
- UMAC_IRQ_MPD_R)) {
- /* all other interested interrupts handled in bottom half */
- schedule_work(&priv->bcmgenet_irq_work);
+ if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
+ status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
+ wake_up(&priv->wq);
}
- if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
- priv->irq0_stat & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
- priv->irq0_stat &= ~(UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR);
- wake_up(&priv->wq);
+ /* all other interested interrupts handled in bottom half */
+ status &= (UMAC_IRQ_LINK_EVENT |
+ UMAC_IRQ_MPD_R);
+ if (status) {
+ /* Save irq status for bottom-half processing. */
+ spin_lock_irqsave(&priv->lock, flags);
+ priv->irq0_stat |= status;
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ schedule_work(&priv->bcmgenet_irq_work);
}
return IRQ_HANDLED;
@@ -2808,6 +2862,8 @@
err_fini_dma:
bcmgenet_fini_dma(priv);
err_clk_disable:
+ if (priv->internal_phy)
+ bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
clk_disable_unprepare(priv->clk);
return ret;
}
@@ -3184,6 +3240,12 @@
*/
gphy_rev = reg & 0xffff;
+ /* This is reserved so should require special treatment */
+ if (gphy_rev == 0 || gphy_rev == 0x01ff) {
+ pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
+ return;
+ }
+
/* This is the good old scheme, just GPHY major, no minor nor patch */
if ((gphy_rev & 0xf0) != 0)
priv->gphy_rev = gphy_rev << 8;
@@ -3192,12 +3254,6 @@
else if ((gphy_rev & 0xff00) != 0)
priv->gphy_rev = gphy_rev;
- /* This is reserved so should require special treatment */
- else if (gphy_rev == 0 || gphy_rev == 0x01ff) {
- pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
- return;
- }
-
#ifdef CONFIG_PHYS_ADDR_T_64BIT
if (!(params->flags & GENET_HAS_40BITS))
pr_warn("GENET does not support 40-bits PA\n");
@@ -3240,6 +3296,7 @@
const void *macaddr;
struct resource *r;
int err = -EIO;
+ const char *phy_mode_str;
/* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */
dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1,
@@ -3283,6 +3340,8 @@
goto err;
}
+ spin_lock_init(&priv->lock);
+
SET_NETDEV_DEV(dev, &pdev->dev);
dev_set_drvdata(&pdev->dev, dev);
ether_addr_copy(dev->dev_addr, macaddr);
@@ -3345,6 +3404,13 @@
priv->clk_eee = NULL;
}
+ /* If this is an internal GPHY, power it on now, before UniMAC is
+ * brought out of reset as absolutely no UniMAC activity is allowed
+ */
+ if (dn && !of_property_read_string(dn, "phy-mode", &phy_mode_str) &&
+ !strcasecmp(phy_mode_str, "internal"))
+ bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+
err = reset_umac(priv);
if (err)
goto err_clk_disable;
@@ -3511,6 +3577,8 @@
return 0;
out_clk_disable:
+ if (priv->internal_phy)
+ bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
clk_disable_unprepare(priv->clk);
return ret;
}
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 1e2dc34..db7f289 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -214,7 +214,9 @@
#define MDIO_REG_SHIFT 16
#define MDIO_REG_MASK 0x1F
-#define UMAC_RBUF_OVFL_CNT 0x61C
+#define UMAC_RBUF_OVFL_CNT_V1 0x61C
+#define RBUF_OVFL_CNT_V2 0x80
+#define RBUF_OVFL_CNT_V3PLUS 0x94
#define UMAC_MPD_CTRL 0x620
#define MPD_EN (1 << 0)
@@ -224,7 +226,9 @@
#define UMAC_MPD_PW_MS 0x624
#define UMAC_MPD_PW_LS 0x628
-#define UMAC_RBUF_ERR_CNT 0x634
+#define UMAC_RBUF_ERR_CNT_V1 0x634
+#define RBUF_ERR_CNT_V2 0x84
+#define RBUF_ERR_CNT_V3PLUS 0x98
#define UMAC_MDF_ERR_CNT 0x638
#define UMAC_MDF_CTRL 0x650
#define UMAC_MDF_ADDR 0x654
@@ -619,11 +623,13 @@
struct work_struct bcmgenet_irq_work;
int irq0;
int irq1;
- unsigned int irq0_stat;
- unsigned int irq1_stat;
int wol_irq;
bool wol_irq_disabled;
+ /* shared status */
+ spinlock_t lock;
+ unsigned int irq0_stat;
+
/* HW descriptors/checksum variables */
bool desc_64b_en;
bool desc_rxchk_en;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index edae2dc..bb22d32 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14226,7 +14226,9 @@
/* Reset PHY, otherwise the read DMA engine will be in a mode that
* breaks all requests to 256 bytes.
*/
- if (tg3_asic_rev(tp) == ASIC_REV_57766)
+ if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
+ tg3_asic_rev(tp) == ASIC_REV_5717 ||
+ tg3_asic_rev(tp) == ASIC_REV_5719)
reset_phy = true;
err = tg3_restart_hw(tp, reset_phy);
diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
index 9e59663..0f68118 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
@@ -1930,13 +1930,13 @@
bfa_ioc_send_enable(struct bfa_ioc *ioc)
{
struct bfi_ioc_ctrl_req enable_req;
- struct timeval tv;
bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ,
bfa_ioc_portid(ioc));
enable_req.clscode = htons(ioc->clscode);
- do_gettimeofday(&tv);
- enable_req.tv_sec = ntohl(tv.tv_sec);
+ enable_req.rsvd = htons(0);
+ /* overflow in 2106 */
+ enable_req.tv_sec = ntohl(ktime_get_real_seconds());
bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req));
}
@@ -1947,6 +1947,10 @@
bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ,
bfa_ioc_portid(ioc));
+ disable_req.clscode = htons(ioc->clscode);
+ disable_req.rsvd = htons(0);
+ /* overflow in 2106 */
+ disable_req.tv_sec = ntohl(ktime_get_real_seconds());
bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req));
}
diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
index 05c1c1d..cebfe3b 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
@@ -325,7 +325,7 @@
return PTR_ERR(kern_buf);
rc = sscanf(kern_buf, "%x:%x", &addr, &len);
- if (rc < 2) {
+ if (rc < 2 || len > UINT_MAX >> 2) {
netdev_warn(bnad->netdev, "failed to read user buffer\n");
kfree(kern_buf);
return -EINVAL;
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
index 67befed..578c7f8 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
@@ -116,8 +116,7 @@
int speed = 2;
if (!xcv) {
- dev_err(&xcv->pdev->dev,
- "XCV init not done, probe may have failed\n");
+ pr_err("XCV init not done, probe may have failed\n");
return;
}
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
index 0f0de5b..d04a6c1 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
@@ -133,17 +133,15 @@
if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
fl6.flowi6_oif = sin6_scope_id;
dst = ip6_route_output(&init_net, NULL, &fl6);
- if (!dst)
- goto out;
- if (!cxgb_our_interface(lldi, get_real_dev,
- ip6_dst_idev(dst)->dev) &&
- !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
+ if (dst->error ||
+ (!cxgb_our_interface(lldi, get_real_dev,
+ ip6_dst_idev(dst)->dev) &&
+ !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK))) {
dst_release(dst);
- dst = NULL;
+ return NULL;
}
}
-out:
return dst;
}
EXPORT_SYMBOL(cxgb_find_route6);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 5626908..b2eeecb 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -275,8 +275,7 @@
/* Check if mac has already been added as part of uc-list */
for (i = 0; i < adapter->uc_macs; i++) {
- if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
- mac)) {
+ if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
/* mac already added, skip addition */
adapter->pmac_id[0] = adapter->pmac_id[i + 1];
return 0;
@@ -363,8 +362,10 @@
status = -EPERM;
goto err;
}
-done:
+
+ /* Remember currently programmed MAC */
ether_addr_copy(adapter->dev_mac, addr->sa_data);
+done:
ether_addr_copy(netdev->dev_addr, addr->sa_data);
dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
return 0;
@@ -1679,14 +1680,12 @@
static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
{
- if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
- adapter->dev_mac)) {
+ if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
return 0;
}
- return be_cmd_pmac_add(adapter,
- (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
+ return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
adapter->if_handle,
&adapter->pmac_id[uc_idx + 1], 0);
}
@@ -1722,9 +1721,8 @@
}
if (adapter->update_uc_list) {
- i = 1; /* First slot is claimed by the Primary MAC */
-
/* cache the uc-list in adapter array */
+ i = 0;
netdev_for_each_uc_addr(ha, netdev) {
ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
i++;
@@ -3639,8 +3637,10 @@
{
/* Don't delete MAC on BE3 VFs without FILTMGMT privilege */
if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
- check_privilege(adapter, BE_PRIV_FILTMGMT))
+ check_privilege(adapter, BE_PRIV_FILTMGMT)) {
be_dev_mac_del(adapter, adapter->pmac_id[0]);
+ eth_zero_addr(adapter->dev_mac);
+ }
be_clear_uc_list(adapter);
be_clear_mc_list(adapter);
@@ -3794,12 +3794,27 @@
if (status)
return status;
- /* Don't add MAC on BE3 VFs without FILTMGMT privilege */
- if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
- check_privilege(adapter, BE_PRIV_FILTMGMT)) {
+ /* Normally this condition usually true as the ->dev_mac is zeroed.
+ * But on BE3 VFs the initial MAC is pre-programmed by PF and
+ * subsequent be_dev_mac_add() can fail (after fresh boot)
+ */
+ if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
+ int old_pmac_id = -1;
+
+ /* Remember old programmed MAC if any - can happen on BE3 VF */
+ if (!is_zero_ether_addr(adapter->dev_mac))
+ old_pmac_id = adapter->pmac_id[0];
+
status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
if (status)
return status;
+
+ /* Delete the old programmed MAC as we successfully programmed
+ * a new MAC
+ */
+ if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
+ be_dev_mac_del(adapter, old_pmac_id);
+
ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
}
@@ -4573,6 +4588,10 @@
memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
+
+ /* Initial MAC for BE3 VFs is already programmed by PF */
+ if (BEx_chip(adapter) && be_virtfn(adapter))
+ memcpy(adapter->dev_mac, mac, ETH_ALEN);
}
return 0;
@@ -4714,6 +4733,15 @@
be_schedule_worker(adapter);
+ /*
+ * The IF was destroyed and re-created. We need to clear
+ * all promiscuous flags valid for the destroyed IF.
+ * Without this promisc mode is not restored during
+ * be_open() because the driver thinks that it is
+ * already enabled in HW.
+ */
+ adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
+
if (netif_running(netdev))
status = be_open(netdev);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 12aef1b..9170918 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -172,10 +172,12 @@
#endif /* CONFIG_M5272 */
/* The FEC stores dest/src/type/vlan, data, and checksum for receive packets.
+ *
+ * 2048 byte skbufs are allocated. However, alignment requirements
+ * varies between FEC variants. Worst case is 64, so round down by 64.
*/
-#define PKT_MAXBUF_SIZE 1522
+#define PKT_MAXBUF_SIZE (round_down(2048 - 64, 64))
#define PKT_MINBUF_SIZE 64
-#define PKT_MAXBLR_SIZE 1536
/* FEC receive acceleration */
#define FEC_RACC_IPDIS (1 << 1)
@@ -813,6 +815,12 @@
for (i = 0; i < txq->bd.ring_size; i++) {
/* Initialize the BD for every fragment in the page. */
bdp->cbd_sc = cpu_to_fec16(0);
+ if (bdp->cbd_bufaddr &&
+ !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
+ dma_unmap_single(&fep->pdev->dev,
+ fec32_to_cpu(bdp->cbd_bufaddr),
+ fec16_to_cpu(bdp->cbd_datlen),
+ DMA_TO_DEVICE);
if (txq->tx_skbuff[i]) {
dev_kfree_skb_any(txq->tx_skbuff[i]);
txq->tx_skbuff[i] = NULL;
@@ -847,7 +855,7 @@
for (i = 0; i < fep->num_rx_queues; i++) {
rxq = fep->rx_queue[i];
writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i));
- writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i));
+ writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i));
/* enable DMA1/2 */
if (i)
@@ -2923,6 +2931,7 @@
struct netdev_hw_addr *ha;
unsigned int i, bit, data, crc, tmp;
unsigned char hash;
+ unsigned int hash_high = 0, hash_low = 0;
if (ndev->flags & IFF_PROMISC) {
tmp = readl(fep->hwp + FEC_R_CNTRL);
@@ -2945,11 +2954,7 @@
return;
}
- /* Clear filter and add the addresses in hash register
- */
- writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
- writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
-
+ /* Add the addresses in hash register */
netdev_for_each_mc_addr(ha, ndev) {
/* calculate crc32 value of mac address */
crc = 0xffffffff;
@@ -2967,16 +2972,14 @@
*/
hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f;
- if (hash > 31) {
- tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
- tmp |= 1 << (hash - 32);
- writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
- } else {
- tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_LOW);
- tmp |= 1 << hash;
- writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
- }
+ if (hash > 31)
+ hash_high |= 1 << (hash - 32);
+ else
+ hash_low |= 1 << hash;
}
+
+ writel(hash_high, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
+ writel(hash_low, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
}
/* Set a MAC change in hardware. */
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index b8778e7..7c6c146 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -404,7 +404,7 @@
send_map_query(adapter);
for (i = 0; i < rxadd_subcrqs; i++) {
init_rx_pool(adapter, &adapter->rx_pool[i],
- IBMVNIC_BUFFS_PER_POOL, i,
+ adapter->req_rx_add_entries_per_subcrq, i,
be64_to_cpu(size_array[i]), 1);
if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) {
dev_err(dev, "Couldn't alloc rx pool\n");
@@ -419,23 +419,23 @@
for (i = 0; i < tx_subcrqs; i++) {
tx_pool = &adapter->tx_pool[i];
tx_pool->tx_buff =
- kcalloc(adapter->max_tx_entries_per_subcrq,
+ kcalloc(adapter->req_tx_entries_per_subcrq,
sizeof(struct ibmvnic_tx_buff), GFP_KERNEL);
if (!tx_pool->tx_buff)
goto tx_pool_alloc_failed;
if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
- adapter->max_tx_entries_per_subcrq *
+ adapter->req_tx_entries_per_subcrq *
adapter->req_mtu))
goto tx_ltb_alloc_failed;
tx_pool->free_map =
- kcalloc(adapter->max_tx_entries_per_subcrq,
+ kcalloc(adapter->req_tx_entries_per_subcrq,
sizeof(int), GFP_KERNEL);
if (!tx_pool->free_map)
goto tx_fm_alloc_failed;
- for (j = 0; j < adapter->max_tx_entries_per_subcrq; j++)
+ for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
tx_pool->free_map[j] = j;
tx_pool->consumer_index = 0;
@@ -705,6 +705,7 @@
u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
struct device *dev = &adapter->vdev->dev;
struct ibmvnic_tx_buff *tx_buff = NULL;
+ struct ibmvnic_sub_crq_queue *tx_scrq;
struct ibmvnic_tx_pool *tx_pool;
unsigned int tx_send_failed = 0;
unsigned int tx_map_failed = 0;
@@ -724,6 +725,7 @@
int ret = 0;
tx_pool = &adapter->tx_pool[queue_num];
+ tx_scrq = adapter->tx_scrq[queue_num];
txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
be32_to_cpu(adapter->login_rsp_buf->
@@ -744,7 +746,7 @@
tx_pool->consumer_index =
(tx_pool->consumer_index + 1) %
- adapter->max_tx_entries_per_subcrq;
+ adapter->req_tx_entries_per_subcrq;
tx_buff = &tx_pool->tx_buff[index];
tx_buff->skb = skb;
@@ -817,7 +819,7 @@
if (tx_pool->consumer_index == 0)
tx_pool->consumer_index =
- adapter->max_tx_entries_per_subcrq - 1;
+ adapter->req_tx_entries_per_subcrq - 1;
else
tx_pool->consumer_index--;
@@ -826,6 +828,14 @@
ret = NETDEV_TX_BUSY;
goto out;
}
+
+ atomic_inc(&tx_scrq->used);
+
+ if (atomic_read(&tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) {
+ netdev_info(netdev, "Stopping queue %d\n", queue_num);
+ netif_stop_subqueue(netdev, queue_num);
+ }
+
tx_packets++;
tx_bytes += skb->len;
txq->trans_start = jiffies;
@@ -1220,6 +1230,7 @@
scrq->adapter = adapter;
scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
scrq->cur = 0;
+ atomic_set(&scrq->used, 0);
scrq->rx_skb_top = NULL;
spin_lock_init(&scrq->lock);
@@ -1368,14 +1379,28 @@
DMA_TO_DEVICE);
}
- if (txbuff->last_frag)
+ if (txbuff->last_frag) {
+ atomic_dec(&scrq->used);
+
+ if (atomic_read(&scrq->used) <=
+ (adapter->req_tx_entries_per_subcrq / 2) &&
+ netif_subqueue_stopped(adapter->netdev,
+ txbuff->skb)) {
+ netif_wake_subqueue(adapter->netdev,
+ scrq->pool_index);
+ netdev_dbg(adapter->netdev,
+ "Started queue %d\n",
+ scrq->pool_index);
+ }
+
dev_kfree_skb_any(txbuff->skb);
+ }
adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
producer_index] = index;
adapter->tx_pool[pool].producer_index =
(adapter->tx_pool[pool].producer_index + 1) %
- adapter->max_tx_entries_per_subcrq;
+ adapter->req_tx_entries_per_subcrq;
}
/* remove tx_comp scrq*/
next->tx_comp.first = 0;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index dd775d9..892eda3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -863,6 +863,7 @@
spinlock_t lock;
struct sk_buff *rx_skb_top;
struct ibmvnic_adapter *adapter;
+ atomic_t used;
};
struct ibmvnic_long_term_buff {
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index f3aaca7..8a48656 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1364,6 +1364,9 @@
* Checks to see of the link status of the hardware has changed. If a
* change in link status has been detected, then we read the PHY registers
* to get the current speed/duplex if link exists.
+ *
+ * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
+ * up).
**/
static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
{
@@ -1379,7 +1382,7 @@
* Change or Rx Sequence Error interrupt.
*/
if (!mac->get_link_status)
- return 0;
+ return 1;
/* First we want to see if the MII Status Register reports
* link. If so, then we want to get the current speed/duplex
@@ -1611,10 +1614,12 @@
* different link partner.
*/
ret_val = e1000e_config_fc_after_link_up(hw);
- if (ret_val)
+ if (ret_val) {
e_dbg("Error configuring flow control\n");
+ return ret_val;
+ }
- return ret_val;
+ return 1;
}
static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index 4d19e46..3693ae1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -508,8 +508,8 @@
int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac);
int fm10k_ndo_set_vf_vlan(struct net_device *netdev,
int vf_idx, u16 vid, u8 qos, __be16 vlan_proto);
-int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate,
- int unused);
+int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
+ int __always_unused min_rate, int max_rate);
int fm10k_ndo_get_vf_config(struct net_device *netdev,
int vf_idx, struct ifla_vf_info *ivi);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index 5f4dac0..e72fd52 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -126,6 +126,9 @@
struct fm10k_mbx_info *mbx = &vf_info->mbx;
u16 glort = vf_info->glort;
+ /* process the SM mailbox first to drain outgoing messages */
+ hw->mbx.ops.process(hw, &hw->mbx);
+
/* verify port mapping is valid, if not reset port */
if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort))
hw->iov.ops.reset_lport(hw, vf_info);
@@ -482,7 +485,7 @@
}
int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
- int __always_unused unused, int rate)
+ int __always_unused min_rate, int max_rate)
{
struct fm10k_intfc *interface = netdev_priv(netdev);
struct fm10k_iov_data *iov_data = interface->iov_data;
@@ -493,14 +496,15 @@
return -EINVAL;
/* rate limit cannot be less than 10Mbs or greater than link speed */
- if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX))
+ if (max_rate &&
+ (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX))
return -EINVAL;
/* store values */
- iov_data->vf_info[vf_idx].rate = rate;
+ iov_data->vf_info[vf_idx].rate = max_rate;
/* update hardware configuration */
- hw->iov.ops.configure_tc(hw, vf_idx, rate);
+ hw->iov.ops.configure_tc(hw, vf_idx, max_rate);
return 0;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2caafeb..becffd1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4217,8 +4217,12 @@
if (!vsi->netdev)
return;
- for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
- napi_enable(&vsi->q_vectors[q_idx]->napi);
+ for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+ struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+ if (q_vector->rx.ring || q_vector->tx.ring)
+ napi_enable(&q_vector->napi);
+ }
}
/**
@@ -4232,8 +4236,12 @@
if (!vsi->netdev)
return;
- for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
- napi_disable(&vsi->q_vectors[q_idx]->napi);
+ for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+ struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+ if (q_vector->rx.ring || q_vector->tx.ring)
+ napi_disable(&q_vector->napi);
+ }
}
/**
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 1683960..3a61491 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3102,6 +3102,8 @@
/* Setup and initialize a copy of the hw vlan table array */
adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
GFP_ATOMIC);
+ if (!adapter->shadow_vfta)
+ return -ENOMEM;
/* This call may decrease the number of queues */
if (igb_init_interrupt_scheme(adapter, true)) {
@@ -3271,7 +3273,7 @@
int igb_close(struct net_device *netdev)
{
- if (netif_device_present(netdev))
+ if (netif_device_present(netdev) || netdev->dismantle)
return __igb_close(netdev, false);
return 0;
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 77d3039..ad33622 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3696,10 +3696,10 @@
fw_cmd.ver_build = build;
fw_cmd.ver_sub = sub;
fw_cmd.hdr.checksum = 0;
- fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
- (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
fw_cmd.pad = 0;
fw_cmd.pad2 = 0;
+ fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
+ (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
ret_val = ixgbe_host_interface_command(hw, &fw_cmd,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 60f0bf7..77a60aa 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -617,6 +617,8 @@
/* convert offset from words to bytes */
buffer.address = cpu_to_be32((offset + current_word) * 2);
buffer.length = cpu_to_be16(words_to_read * 2);
+ buffer.pad2 = 0;
+ buffer.pad3 = 0;
status = ixgbe_host_interface_command(hw, &buffer,
sizeof(buffer),
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index a0d1b08..7aeb7fe 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -232,7 +232,8 @@
dev->regs + MVMDIO_ERR_INT_MASK);
} else if (dev->err_interrupt == -EPROBE_DEFER) {
- return -EPROBE_DEFER;
+ ret = -EPROBE_DEFER;
+ goto out_mdio;
}
mutex_init(&dev->lock);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 6ea10a9..fa46326 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1182,6 +1182,10 @@
val &= ~MVNETA_GMAC0_PORT_ENABLE;
mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
+ pp->link = 0;
+ pp->duplex = -1;
+ pp->speed = 0;
+
udelay(200);
}
@@ -1905,9 +1909,9 @@
if (!mvneta_rxq_desc_is_first_last(rx_status) ||
(rx_status & MVNETA_RXD_ERR_SUMMARY)) {
+ mvneta_rx_error(pp, rx_desc);
err_drop_frame:
dev->stats.rx_errors++;
- mvneta_rx_error(pp, rx_desc);
/* leave the descriptor untouched */
continue;
}
@@ -2922,7 +2926,7 @@
{
int queue;
- for (queue = 0; queue < txq_number; queue++)
+ for (queue = 0; queue < rxq_number; queue++)
mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index e36bebc..dae9dcf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2304,6 +2304,17 @@
rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
/* PCI might be offline */
+
+ /* If device removal has been requested,
+ * do not continue retrying.
+ */
+ if (dev->persist->interface_state &
+ MLX4_INTERFACE_STATE_NOWAIT) {
+ mlx4_warn(dev,
+ "communication channel is offline\n");
+ return -EIO;
+ }
+
msleep(100);
wr_toggle = swab32(readl(&priv->mfunc.comm->
slave_write));
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index d4d97ca..f9897d1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -251,13 +251,9 @@
{
u32 freq_khz = freq * 1000;
u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC;
- u64 tmp_rounded =
- roundup_pow_of_two(max_val_cycles) > max_val_cycles ?
- roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX;
- u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ?
- max_val_cycles : tmp_rounded;
+ u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1);
/* calculate max possible multiplier in order to fit in 64bit */
- u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded);
+ u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded);
/* This comes from the reverse of clocksource_khz2mult */
return ilog2(div_u64(max_mul * freq_khz, 1000000));
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 727122d..5411ca4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1940,6 +1940,14 @@
(u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
if (!offline_bit)
return 0;
+
+ /* If device removal has been requested,
+ * do not continue retrying.
+ */
+ if (dev->persist->interface_state &
+ MLX4_INTERFACE_STATE_NOWAIT)
+ break;
+
/* There are cases as part of AER/Reset flow that PF needs
* around 100 msec to load. We therefore sleep for 100 msec
* to allow other tasks to make use of that CPU during this
@@ -3954,6 +3962,9 @@
struct devlink *devlink = priv_to_devlink(priv);
int active_vfs = 0;
+ if (mlx4_is_slave(dev))
+ persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
+
mutex_lock(&persist->interface_state_mutex);
persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
mutex_unlock(&persist->interface_state_mutex);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index f7fabec..4c3f1cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -367,7 +367,7 @@
case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
case MLX5_CMD_OP_ALLOC_Q_COUNTER:
case MLX5_CMD_OP_QUERY_Q_COUNTER:
- case MLX5_CMD_OP_SET_RATE_LIMIT:
+ case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
case MLX5_CMD_OP_QUERY_RATE_LIMIT:
case MLX5_CMD_OP_ALLOC_PD:
case MLX5_CMD_OP_ALLOC_UAR:
@@ -502,7 +502,7 @@
MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
- MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
+ MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(ALLOC_PD);
MLX5_COMMAND_STR_CASE(DEALLOC_PD);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 9d37229..38981db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3038,6 +3038,7 @@
struct sk_buff *skb,
netdev_features_t features)
{
+ unsigned int offset = 0;
struct udphdr *udph;
u16 proto;
u16 port = 0;
@@ -3047,7 +3048,7 @@
proto = ip_hdr(skb)->protocol;
break;
case htons(ETH_P_IPV6):
- proto = ipv6_hdr(skb)->nexthdr;
+ proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
break;
default:
goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 4de3c28..331a6ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1015,7 +1015,7 @@
u32 *match_criteria)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct list_head *prev = ft->node.children.prev;
+ struct list_head *prev = &ft->node.children;
unsigned int candidate_index = 0;
struct mlx5_flow_group *fg;
void *match_criteria_addr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b3309f2..981cd1d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1283,6 +1283,7 @@
if (err)
goto clean_load;
+ pci_save_state(pdev);
return 0;
clean_load:
@@ -1331,9 +1332,8 @@
mlx5_enter_error_state(dev);
mlx5_unload_one(dev, priv, false);
- /* In case of kernel call save the pci state and drain the health wq */
+ /* In case of kernel call drain the health wq */
if (state) {
- pci_save_state(pdev);
mlx5_drain_health_wq(dev);
mlx5_pci_disable_device(dev);
}
@@ -1385,6 +1385,7 @@
pci_set_master(pdev);
pci_restore_state(pdev);
+ pci_save_state(pdev);
if (wait_vital(pdev)) {
dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index d0a4005..9346f39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -303,8 +303,8 @@
err_cmd:
memset(din, 0, sizeof(din));
memset(dout, 0, sizeof(dout));
- MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
- MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+ MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index 104902a..2be9ec5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -60,16 +60,16 @@
return ret_entry;
}
-static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,
+static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
u32 rate, u16 index)
{
- u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
- MLX5_SET(set_rate_limit_in, in, opcode,
- MLX5_CMD_OP_SET_RATE_LIMIT);
- MLX5_SET(set_rate_limit_in, in, rate_limit_index, index);
- MLX5_SET(set_rate_limit_in, in, rate_limit, rate);
+ MLX5_SET(set_pp_rate_limit_in, in, opcode,
+ MLX5_CMD_OP_SET_PP_RATE_LIMIT);
+ MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
+ MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
@@ -108,7 +108,7 @@
entry->refcount++;
} else {
/* new rate limit */
- err = mlx5_set_rate_limit_cmd(dev, rate, entry->index);
+ err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
if (err) {
mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
rate, err);
@@ -144,7 +144,7 @@
entry->refcount--;
if (!entry->refcount) {
/* need to remove rate */
- mlx5_set_rate_limit_cmd(dev, 0, entry->index);
+ mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
entry->rate = 0;
}
@@ -197,8 +197,8 @@
/* Clear all configured rates */
for (i = 0; i < table->max_size; i++)
if (table->rl_entry[i].rate)
- mlx5_set_rate_limit_cmd(dev, 0,
- table->rl_entry[i].index);
+ mlx5_set_pp_rate_limit_cmd(dev, 0,
+ table->rl_entry[i].index);
kfree(dev->priv.rl_table.rl_entry);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index 07a9ba6..2f74953 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -71,9 +71,9 @@
struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
struct mlx5e_vxlan *vxlan;
- spin_lock(&vxlan_db->lock);
+ spin_lock_bh(&vxlan_db->lock);
vxlan = radix_tree_lookup(&vxlan_db->tree, port);
- spin_unlock(&vxlan_db->lock);
+ spin_unlock_bh(&vxlan_db->lock);
return vxlan;
}
@@ -88,8 +88,12 @@
struct mlx5e_vxlan *vxlan;
int err;
- if (mlx5e_vxlan_lookup_port(priv, port))
+ mutex_lock(&priv->state_lock);
+ vxlan = mlx5e_vxlan_lookup_port(priv, port);
+ if (vxlan) {
+ atomic_inc(&vxlan->refcount);
goto free_work;
+ }
if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
goto free_work;
@@ -99,10 +103,11 @@
goto err_delete_port;
vxlan->udp_port = port;
+ atomic_set(&vxlan->refcount, 1);
- spin_lock_irq(&vxlan_db->lock);
+ spin_lock_bh(&vxlan_db->lock);
err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
- spin_unlock_irq(&vxlan_db->lock);
+ spin_unlock_bh(&vxlan_db->lock);
if (err)
goto err_free;
@@ -113,35 +118,39 @@
err_delete_port:
mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
free_work:
+ mutex_unlock(&priv->state_lock);
kfree(vxlan_work);
}
-static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port)
-{
- struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
- struct mlx5e_vxlan *vxlan;
-
- spin_lock_irq(&vxlan_db->lock);
- vxlan = radix_tree_delete(&vxlan_db->tree, port);
- spin_unlock_irq(&vxlan_db->lock);
-
- if (!vxlan)
- return;
-
- mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port);
-
- kfree(vxlan);
-}
-
static void mlx5e_vxlan_del_port(struct work_struct *work)
{
struct mlx5e_vxlan_work *vxlan_work =
container_of(work, struct mlx5e_vxlan_work, work);
- struct mlx5e_priv *priv = vxlan_work->priv;
+ struct mlx5e_priv *priv = vxlan_work->priv;
+ struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
u16 port = vxlan_work->port;
+ struct mlx5e_vxlan *vxlan;
+ bool remove = false;
- __mlx5e_vxlan_core_del_port(priv, port);
+ mutex_lock(&priv->state_lock);
+ spin_lock_bh(&vxlan_db->lock);
+ vxlan = radix_tree_lookup(&vxlan_db->tree, port);
+ if (!vxlan)
+ goto out_unlock;
+ if (atomic_dec_and_test(&vxlan->refcount)) {
+ radix_tree_delete(&vxlan_db->tree, port);
+ remove = true;
+ }
+
+out_unlock:
+ spin_unlock_bh(&vxlan_db->lock);
+
+ if (remove) {
+ mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+ kfree(vxlan);
+ }
+ mutex_unlock(&priv->state_lock);
kfree(vxlan_work);
}
@@ -171,12 +180,11 @@
struct mlx5e_vxlan *vxlan;
unsigned int port = 0;
- spin_lock_irq(&vxlan_db->lock);
+ /* Lockless since we are the only radix-tree consumers, wq is disabled */
while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
port = vxlan->udp_port;
- spin_unlock_irq(&vxlan_db->lock);
- __mlx5e_vxlan_core_del_port(priv, (u16)port);
- spin_lock_irq(&vxlan_db->lock);
+ radix_tree_delete(&vxlan_db->tree, port);
+ mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+ kfree(vxlan);
}
- spin_unlock_irq(&vxlan_db->lock);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
index 5def12c..5ef6ae7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
@@ -36,6 +36,7 @@
#include "en.h"
struct mlx5e_vxlan {
+ atomic_t refcount;
u16 udp_port;
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 6460c72..a01e6c0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -788,7 +788,7 @@
#define MLXSW_REG_SPVM_ID 0x200F
#define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */
#define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVM_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVM_REC_MAX_COUNT 255
#define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN + \
MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT)
@@ -1757,7 +1757,7 @@
#define MLXSW_REG_SPVMLR_ID 0x2020
#define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */
#define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255
#define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \
MLXSW_REG_SPVMLR_REC_LEN * \
MLXSW_REG_SPVMLR_REC_MAX_COUNT)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 9e31a33..1655601 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -765,11 +765,8 @@
dipn = htonl(dip);
dev = mlxsw_sp->rifs[rif]->dev;
n = neigh_lookup(&arp_tbl, &dipn, dev);
- if (!n) {
- netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
- &dip);
+ if (!n)
return;
- }
netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
neigh_event_send(n, NULL);
@@ -1328,9 +1325,9 @@
static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
bool removing)
{
- if (!removing && !nh->should_offload)
+ if (!removing)
nh->should_offload = 1;
- else if (removing && nh->offloaded)
+ else
nh->should_offload = 0;
nh->update = 1;
}
diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index 4367dd6..0622fd0 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -25,6 +25,7 @@
#include <linux/of_irq.h>
#include <linux/crc32.h>
#include <linux/crc32c.h>
+#include <linux/circ_buf.h>
#include "moxart_ether.h"
@@ -278,6 +279,13 @@
return rx;
}
+static int moxart_tx_queue_space(struct net_device *ndev)
+{
+ struct moxart_mac_priv_t *priv = netdev_priv(ndev);
+
+ return CIRC_SPACE(priv->tx_head, priv->tx_tail, TX_DESC_NUM);
+}
+
static void moxart_tx_finished(struct net_device *ndev)
{
struct moxart_mac_priv_t *priv = netdev_priv(ndev);
@@ -297,6 +305,9 @@
tx_tail = TX_NEXT(tx_tail);
}
priv->tx_tail = tx_tail;
+ if (netif_queue_stopped(ndev) &&
+ moxart_tx_queue_space(ndev) >= TX_WAKE_THRESHOLD)
+ netif_wake_queue(ndev);
}
static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id)
@@ -324,13 +335,18 @@
struct moxart_mac_priv_t *priv = netdev_priv(ndev);
void *desc;
unsigned int len;
- unsigned int tx_head = priv->tx_head;
+ unsigned int tx_head;
u32 txdes1;
int ret = NETDEV_TX_BUSY;
+ spin_lock_irq(&priv->txlock);
+
+ tx_head = priv->tx_head;
desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head);
- spin_lock_irq(&priv->txlock);
+ if (moxart_tx_queue_space(ndev) == 1)
+ netif_stop_queue(ndev);
+
if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
net_dbg_ratelimited("no TX space for packet\n");
priv->stats.tx_dropped++;
diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h
index 93a9563..afc32ec 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.h
+++ b/drivers/net/ethernet/moxa/moxart_ether.h
@@ -59,6 +59,7 @@
#define TX_NEXT(N) (((N) + 1) & (TX_DESC_NUM_MASK))
#define TX_BUF_SIZE 1600
#define TX_BUF_SIZE_MAX (TX_DESC1_BUF_SIZE_MASK+1)
+#define TX_WAKE_THRESHOLD 16
#define RX_DESC_NUM 64
#define RX_DESC_NUM_MASK (RX_DESC_NUM-1)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 0c42c24..ed014bd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -373,8 +373,9 @@
u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val;
u32 cxt_size = CONN_CXT_SIZE(p_hwfn);
u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size;
+ u32 align = elems_per_page * DQ_RANGE_ALIGN;
- p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page);
+ p_conn->cid_count = roundup(p_conn->cid_count, align);
}
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 62ae55b..a3360cb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -187,6 +187,8 @@
/* If need to reuse or there's no replacement buffer, repost this */
if (rc)
goto out_post;
+ dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
+ cdev->ll2->rx_size, DMA_FROM_DEVICE);
skb = build_skb(buffer->data, 0);
if (!skb) {
@@ -441,7 +443,7 @@
static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
struct qed_ll2_info *p_ll2_conn,
union core_rx_cqe_union *p_cqe,
- unsigned long lock_flags,
+ unsigned long *p_lock_flags,
bool b_last_cqe)
{
struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
@@ -462,10 +464,10 @@
"Mismatch between active_descq and the LL2 Rx chain\n");
list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
- spin_unlock_irqrestore(&p_rx->lock, lock_flags);
+ spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags);
qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id,
p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe);
- spin_lock_irqsave(&p_rx->lock, lock_flags);
+ spin_lock_irqsave(&p_rx->lock, *p_lock_flags);
return 0;
}
@@ -505,7 +507,8 @@
break;
case CORE_RX_CQE_TYPE_REGULAR:
rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn,
- cqe, flags, b_last_cqe);
+ cqe, &flags,
+ b_last_cqe);
break;
default:
rc = -EIO;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index bdbcd2b..c3c28f0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -3849,7 +3849,7 @@
struct list_head *head = &mbx->cmd_q;
struct qlcnic_cmd_args *cmd = NULL;
- spin_lock(&mbx->queue_lock);
+ spin_lock_bh(&mbx->queue_lock);
while (!list_empty(head)) {
cmd = list_entry(head->next, struct qlcnic_cmd_args, list);
@@ -3860,7 +3860,7 @@
qlcnic_83xx_notify_cmd_completion(adapter, cmd);
}
- spin_unlock(&mbx->queue_lock);
+ spin_unlock_bh(&mbx->queue_lock);
}
static int qlcnic_83xx_check_mbx_status(struct qlcnic_adapter *adapter)
@@ -3896,12 +3896,12 @@
{
struct qlcnic_mailbox *mbx = adapter->ahw->mailbox;
- spin_lock(&mbx->queue_lock);
+ spin_lock_bh(&mbx->queue_lock);
list_del(&cmd->list);
mbx->num_cmds--;
- spin_unlock(&mbx->queue_lock);
+ spin_unlock_bh(&mbx->queue_lock);
qlcnic_83xx_notify_cmd_completion(adapter, cmd);
}
@@ -3966,7 +3966,7 @@
init_completion(&cmd->completion);
cmd->rsp_opcode = QLC_83XX_MBX_RESPONSE_UNKNOWN;
- spin_lock(&mbx->queue_lock);
+ spin_lock_bh(&mbx->queue_lock);
list_add_tail(&cmd->list, &mbx->cmd_q);
mbx->num_cmds++;
@@ -3974,7 +3974,7 @@
*timeout = cmd->total_cmds * QLC_83XX_MBX_TIMEOUT;
queue_work(mbx->work_q, &mbx->work);
- spin_unlock(&mbx->queue_lock);
+ spin_unlock_bh(&mbx->queue_lock);
return 0;
}
@@ -4070,15 +4070,15 @@
mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT;
spin_unlock_irqrestore(&mbx->aen_lock, flags);
- spin_lock(&mbx->queue_lock);
+ spin_lock_bh(&mbx->queue_lock);
if (list_empty(head)) {
- spin_unlock(&mbx->queue_lock);
+ spin_unlock_bh(&mbx->queue_lock);
return;
}
cmd = list_entry(head->next, struct qlcnic_cmd_args, list);
- spin_unlock(&mbx->queue_lock);
+ spin_unlock_bh(&mbx->queue_lock);
mbx_ops->encode_cmd(adapter, cmd);
mbx_ops->nofity_fw(adapter, QLC_83XX_MBX_REQUEST);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 2c4350a..18e68c9 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -1387,7 +1387,7 @@
{
void __iomem *ioaddr = tp->mmio_addr;
- return RTL_R8(IBISR0) & 0x02;
+ return RTL_R8(IBISR0) & 0x20;
}
static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
@@ -1395,7 +1395,7 @@
void __iomem *ioaddr = tp->mmio_addr;
RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01);
- rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000);
+ rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000);
RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20);
RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01);
}
@@ -2222,19 +2222,14 @@
void __iomem *ioaddr = tp->mmio_addr;
dma_addr_t paddr = tp->counters_phys_addr;
u32 cmd;
- bool ret;
RTL_W32(CounterAddrHigh, (u64)paddr >> 32);
+ RTL_R32(CounterAddrHigh);
cmd = (u64)paddr & DMA_BIT_MASK(32);
RTL_W32(CounterAddrLow, cmd);
RTL_W32(CounterAddrLow, cmd | counter_cmd);
- ret = rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
-
- RTL_W32(CounterAddrLow, 0);
- RTL_W32(CounterAddrHigh, 0);
-
- return ret;
+ return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
}
static bool rtl8169_reset_counters(struct net_device *dev)
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 11623aa..10d3a9f 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -941,14 +941,10 @@
/* Receive error message handling */
priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
- if (priv->rx_over_errors != ndev->stats.rx_over_errors) {
+ if (priv->rx_over_errors != ndev->stats.rx_over_errors)
ndev->stats.rx_over_errors = priv->rx_over_errors;
- netif_err(priv, rx_err, ndev, "Receive Descriptor Empty\n");
- }
- if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors) {
+ if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
- netif_err(priv, rx_err, ndev, "Receive FIFO Overflow\n");
- }
out:
return budget - quota;
}
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 2140ded..b6816ae 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3087,18 +3087,37 @@
/* ioremap the TSU registers */
if (mdp->cd->tsu) {
struct resource *rtsu;
+
rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu);
- if (IS_ERR(mdp->tsu_addr)) {
- ret = PTR_ERR(mdp->tsu_addr);
+ if (!rtsu) {
+ dev_err(&pdev->dev, "no TSU resource\n");
+ ret = -ENODEV;
+ goto out_release;
+ }
+ /* We can only request the TSU region for the first port
+ * of the two sharing this TSU for the probe to succeed...
+ */
+ if (devno % 2 == 0 &&
+ !devm_request_mem_region(&pdev->dev, rtsu->start,
+ resource_size(rtsu),
+ dev_name(&pdev->dev))) {
+ dev_err(&pdev->dev, "can't request TSU resource.\n");
+ ret = -EBUSY;
+ goto out_release;
+ }
+ mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start,
+ resource_size(rtsu));
+ if (!mdp->tsu_addr) {
+ dev_err(&pdev->dev, "TSU region ioremap() failed.\n");
+ ret = -ENOMEM;
goto out_release;
}
mdp->port = devno % 2;
ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER;
}
- /* initialize first or needed device */
- if (!devno || pd->needs_init) {
+ /* Need to init only the first port of the two sharing a TSU */
+ if (devno % 2 == 0) {
if (mdp->cd->chip_reset)
mdp->cd->chip_reset(ndev);
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 1d85109..3d5d5d54 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -4967,7 +4967,7 @@
* MCFW do not support VFs.
*/
rc = efx_ef10_vport_set_mac_address(efx);
- } else {
+ } else if (rc) {
efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC,
sizeof(inbuf), NULL, 0, rc);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ef6bff8..98bbb91 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -280,8 +280,14 @@
bool stmmac_eee_init(struct stmmac_priv *priv)
{
unsigned long flags;
+ int interface = priv->plat->interface;
bool ret = false;
+ if ((interface != PHY_INTERFACE_MODE_MII) &&
+ (interface != PHY_INTERFACE_MODE_GMII) &&
+ !phy_interface_mode_is_rgmii(interface))
+ goto out;
+
/* Using PCS we cannot dial with the phy registers at this stage
* so we do not support extra feature like EEE.
*/
@@ -1795,6 +1801,7 @@
priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
priv->rx_copybreak = STMMAC_RX_COPYBREAK;
+ priv->mss = 0;
ret = alloc_dma_desc_resources(priv);
if (ret < 0) {
diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig
index 6d68c8a..da4ec57 100644
--- a/drivers/net/ethernet/xilinx/Kconfig
+++ b/drivers/net/ethernet/xilinx/Kconfig
@@ -34,6 +34,7 @@
config XILINX_LL_TEMAC
tristate "Xilinx LL TEMAC (LocalLink Tri-mode Ethernet MAC) driver"
depends on (PPC || MICROBLAZE)
+ depends on !64BIT || BROKEN
select PHYLIB
---help---
This driver supports the Xilinx 10/100/1000 LocalLink TEMAC
diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index e46b1eb..7ea8ead 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -1277,7 +1277,7 @@
fjes_set_ethtool_ops(netdev);
netdev->mtu = fjes_support_mtu[3];
netdev->flags |= IFF_BROADCAST;
- netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER;
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}
static void fjes_irq_watch_task(struct work_struct *work)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index cebde07..cb206e5 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -69,7 +69,6 @@
struct socket *sock0;
struct socket *sock1u;
- struct net *net;
struct net_device *dev;
unsigned int hash_size;
@@ -316,7 +315,7 @@
netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk);
- xnet = !net_eq(gtp->net, dev_net(gtp->dev));
+ xnet = !net_eq(sock_net(sk), dev_net(gtp->dev));
switch (udp_sk(sk)->encap_type) {
case UDP_ENCAP_GTP0:
@@ -612,7 +611,7 @@
pktinfo.fl4.saddr, pktinfo.fl4.daddr,
pktinfo.iph->tos,
ip4_dst_hoplimit(&pktinfo.rt->dst),
- htons(IP_DF),
+ 0,
pktinfo.gtph_port, pktinfo.gtph_port,
true, false);
break;
@@ -658,7 +657,7 @@
static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
static void gtp_hashtable_free(struct gtp_dev *gtp);
static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp,
- int fd_gtp0, int fd_gtp1, struct net *src_net);
+ int fd_gtp0, int fd_gtp1);
static int gtp_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
@@ -675,7 +674,7 @@
fd0 = nla_get_u32(data[IFLA_GTP_FD0]);
fd1 = nla_get_u32(data[IFLA_GTP_FD1]);
- err = gtp_encap_enable(dev, gtp, fd0, fd1, src_net);
+ err = gtp_encap_enable(dev, gtp, fd0, fd1);
if (err < 0)
goto out_err;
@@ -821,7 +820,7 @@
}
static int gtp_encap_enable(struct net_device *dev, struct gtp_dev *gtp,
- int fd_gtp0, int fd_gtp1, struct net *src_net)
+ int fd_gtp0, int fd_gtp1)
{
struct udp_tunnel_sock_cfg tuncfg = {NULL};
struct socket *sock0, *sock1u;
@@ -858,7 +857,6 @@
gtp->sock0 = sock0;
gtp->sock1u = sock1u;
- gtp->net = src_net;
tuncfg.sk_user_data = gtp;
tuncfg.encap_rcv = gtp_encap_recv;
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index b4e9907..980e385 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -404,7 +404,7 @@
struct dst_entry *dst;
int err, ret = NET_XMIT_DROP;
struct flowi6 fl6 = {
- .flowi6_iif = dev->ifindex,
+ .flowi6_oif = dev->ifindex,
.daddr = ip6h->daddr,
.saddr = ip6h->saddr,
.flowi6_flags = FLOWI_FLAG_ANYSRC,
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index a0849f4..c0192f9 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -418,8 +418,9 @@
memset(rd, 0, sizeof(*rd));
rd->hw = hwmap + i;
rd->buf = kmalloc(len, GFP_KERNEL|GFP_DMA);
- if (rd->buf == NULL ||
- !(busaddr = pci_map_single(pdev, rd->buf, len, dir))) {
+ if (rd->buf)
+ busaddr = pci_map_single(pdev, rd->buf, len, dir);
+ if (rd->buf == NULL || pci_dma_mapping_error(pdev, busaddr)) {
if (rd->buf) {
net_err_ratelimited("%s: failed to create PCI-MAP for %p\n",
__func__, rd->buf);
@@ -430,8 +431,7 @@
rd = r->rd + j;
busaddr = rd_get_addr(rd);
rd_set_addr_status(rd, 0, 0);
- if (busaddr)
- pci_unmap_single(pdev, busaddr, len, dir);
+ pci_unmap_single(pdev, busaddr, len, dir);
kfree(rd->buf);
rd->buf = NULL;
}
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index dc8ccac..6d55049 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -452,7 +452,7 @@
struct macvlan_dev, list);
else
vlan = macvlan_hash_lookup(port, eth->h_dest);
- if (vlan == NULL)
+ if (!vlan || vlan->mode == MACVLAN_MODE_SOURCE)
return RX_HANDLER_PASS;
dev = vlan->dev;
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index a52b560..3603eec 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -166,7 +166,7 @@
mac = (const u8 *) ndev->dev_addr;
if (!is_valid_ether_addr(mac))
- return -EFAULT;
+ return -EINVAL;
for (i = 0; i < 3; i++) {
phy_write(phydev, AT803X_MMD_ACCESS_CONTROL,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 2229188..2032a6d 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -624,6 +624,7 @@
phydev->link = 0;
if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
phydev->drv->config_intr(phydev);
+ return genphy_config_aneg(phydev);
}
return 0;
@@ -1020,7 +1021,7 @@
.phy_id = PHY_ID_KSZ8795,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KSZ8795",
- .features = (SUPPORTED_Pause | SUPPORTED_Asym_Pause),
+ .features = PHY_BASIC_FEATURES,
.flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
.config_init = kszphy_config_init,
.config_aneg = ksz8873mll_config_aneg,
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index 93ffedf..1e2d4f1 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -491,13 +491,14 @@
if (err)
return err;
- ks->regs_attr.size = ks->chip->regs_size;
memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr));
+ ks->regs_attr.size = ks->chip->regs_size;
err = ks8995_reset(ks);
if (err)
return err;
+ sysfs_attr_init(&ks->regs_attr.attr);
err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr);
if (err) {
dev_err(&spi->dev, "unable to create sysfs file, err=%d\n",
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 440d5f4..fc4c2cc 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -958,6 +958,7 @@
unregister_netdevice_many(&list);
rtnl_unlock();
+ mutex_destroy(&pn->all_ppp_mutex);
idr_destroy(&pn->units_idr);
}
@@ -1001,17 +1002,18 @@
if (!ifname_is_set)
snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ppp->file.index);
+ mutex_unlock(&pn->all_ppp_mutex);
+
ret = register_netdevice(ppp->dev);
if (ret < 0)
goto err_unit;
atomic_inc(&ppp_unit_count);
- mutex_unlock(&pn->all_ppp_mutex);
-
return 0;
err_unit:
+ mutex_lock(&pn->all_ppp_mutex);
unit_put(&pn->units_idr, ppp->file.index);
err:
mutex_unlock(&pn->all_ppp_mutex);
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 4ddae81..dc36c2e 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -842,6 +842,7 @@
struct pppoe_hdr *ph;
struct net_device *dev;
char *start;
+ int hlen;
lock_sock(sk);
if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) {
@@ -860,16 +861,16 @@
if (total_len > (dev->mtu + dev->hard_header_len))
goto end;
-
- skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32,
- 0, GFP_KERNEL);
+ hlen = LL_RESERVED_SPACE(dev);
+ skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len +
+ dev->needed_tailroom, 0, GFP_KERNEL);
if (!skb) {
error = -ENOMEM;
goto end;
}
/* Reserve space for headers. */
- skb_reserve(skb, dev->hard_header_len);
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
skb->dev = dev;
@@ -930,7 +931,7 @@
/* Copy the data if there is no space for the header or if it's
* read-only.
*/
- if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len))
+ if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph)))
goto abort;
__skb_push(skb, sizeof(*ph));
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 0539869..5f51668 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -525,6 +525,14 @@
skb_queue_purge(&tfile->sk.sk_error_queue);
}
+static void tun_cleanup_tx_array(struct tun_file *tfile)
+{
+ if (tfile->tx_array.ring.queue) {
+ skb_array_cleanup(&tfile->tx_array);
+ memset(&tfile->tx_array, 0, sizeof(tfile->tx_array));
+ }
+}
+
static void __tun_detach(struct tun_file *tfile, bool clean)
{
struct tun_file *ntfile;
@@ -566,8 +574,7 @@
tun->dev->reg_state == NETREG_REGISTERED)
unregister_netdevice(tun->dev);
}
- if (tun)
- skb_array_cleanup(&tfile->tx_array);
+ tun_cleanup_tx_array(tfile);
sock_put(&tfile->sk);
}
}
@@ -606,11 +613,13 @@
/* Drop read queue */
tun_queue_purge(tfile);
sock_put(&tfile->sk);
+ tun_cleanup_tx_array(tfile);
}
list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
tun_enable_queue(tfile);
tun_queue_purge(tfile);
sock_put(&tfile->sk);
+ tun_cleanup_tx_array(tfile);
}
BUG_ON(tun->numdisabled != 0);
@@ -2369,6 +2378,8 @@
sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
+ memset(&tfile->tx_array, 0, sizeof(tfile->tx_array));
+
return 0;
}
diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c
index e221bfc..947bea8 100644
--- a/drivers/net/usb/cx82310_eth.c
+++ b/drivers/net/usb/cx82310_eth.c
@@ -293,12 +293,9 @@
{
int len = skb->len;
- if (skb_headroom(skb) < 2) {
- struct sk_buff *skb2 = skb_copy_expand(skb, 2, 0, flags);
+ if (skb_cow_head(skb, 2)) {
dev_kfree_skb_any(skb);
- skb = skb2;
- if (!skb)
- return NULL;
+ return NULL;
}
skb_push(skb, 2);
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index f33460c..c53385a 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2197,6 +2197,7 @@
buf = DEFAULT_BURST_CAP_SIZE / FS_USB_PKT_SIZE;
dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE;
dev->rx_qlen = 4;
+ dev->tx_qlen = 4;
}
ret = lan78xx_write_reg(dev, BURST_CAP, buf);
@@ -2419,14 +2420,9 @@
{
u32 tx_cmd_a, tx_cmd_b;
- if (skb_headroom(skb) < TX_OVERHEAD) {
- struct sk_buff *skb2;
-
- skb2 = skb_copy_expand(skb, TX_OVERHEAD, 0, flags);
+ if (skb_cow_head(skb, TX_OVERHEAD)) {
dev_kfree_skb_any(skb);
- skb = skb2;
- if (!skb)
- return NULL;
+ return NULL;
}
if (lan78xx_linearize(skb) < 0)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 9cf11c8..e1e5e84 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -74,9 +74,11 @@
net->hard_header_len = 0;
net->addr_len = 0;
net->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+ set_bit(EVENT_NO_IP_ALIGN, &dev->flags);
netdev_dbg(net, "mode: raw IP\n");
} else if (!net->header_ops) { /* don't bother if already set */
ether_setup(net);
+ clear_bit(EVENT_NO_IP_ALIGN, &dev->flags);
netdev_dbg(net, "mode: Ethernet\n");
}
@@ -580,6 +582,10 @@
USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x69),
.driver_info = (unsigned long)&qmi_wwan_info,
},
+ { /* Motorola Mapphone devices with MDM6600 */
+ USB_VENDOR_AND_INTERFACE_INFO(0x22b8, USB_CLASS_VENDOR_SPEC, 0xfb, 0xff),
+ .driver_info = (unsigned long)&qmi_wwan_info,
+ },
/* 2. Combined interface devices matching on class+protocol */
{ /* Huawei E367 and possibly others in "Windows mode" */
@@ -901,6 +907,7 @@
{QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */
{QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */
{QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */
+ {QMI_FIXED_INTF(0x1199, 0x9091, 8)}, /* Sierra Wireless EM7565 */
{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */
{QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */
@@ -936,6 +943,8 @@
{QMI_FIXED_INTF(0x1e0e, 0x9001, 5)}, /* SIMCom 7230E */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */
+ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */
+ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */
/* 4. Gobi 1000 devices */
{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index afb953a..b2d7c7e 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -32,7 +32,7 @@
#define NETNEXT_VERSION "08"
/* Information for net */
-#define NET_VERSION "8"
+#define NET_VERSION "9"
#define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION
#define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -501,6 +501,8 @@
#define RTL8153_RMS RTL8153_MAX_PACKET
#define RTL8152_TX_TIMEOUT (5 * HZ)
#define RTL8152_NAPI_WEIGHT 64
+#define rx_reserved_size(x) ((x) + VLAN_ETH_HLEN + CRC_SIZE + \
+ sizeof(struct rx_desc) + RX_ALIGN)
/* rtl8152 flags */
enum rtl8152_flags {
@@ -1292,6 +1294,7 @@
}
} else {
if (netif_carrier_ok(tp->netdev)) {
+ netif_stop_queue(tp->netdev);
set_bit(RTL8152_LINK_CHG, &tp->flags);
schedule_delayed_work(&tp->schedule, 0);
}
@@ -1362,6 +1365,7 @@
spin_lock_init(&tp->rx_lock);
spin_lock_init(&tp->tx_lock);
INIT_LIST_HEAD(&tp->tx_free);
+ INIT_LIST_HEAD(&tp->rx_done);
skb_queue_head_init(&tp->tx_queue);
skb_queue_head_init(&tp->rx_queue);
@@ -2252,8 +2256,7 @@
static void r8153_set_rx_early_size(struct r8152 *tp)
{
- u32 mtu = tp->netdev->mtu;
- u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8;
+ u32 ocp_data = (agg_buf_sz - rx_reserved_size(tp->netdev->mtu)) / 4;
ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data);
}
@@ -3165,6 +3168,9 @@
napi_enable(&tp->napi);
netif_wake_queue(netdev);
netif_info(tp, link, netdev, "carrier on\n");
+ } else if (netif_queue_stopped(netdev) &&
+ skb_queue_len(&tp->tx_queue) < tp->tx_qlen) {
+ netif_wake_queue(netdev);
}
} else {
if (netif_carrier_ok(netdev)) {
@@ -3698,8 +3704,18 @@
tp->rtl_ops.autosuspend_en(tp, false);
napi_disable(&tp->napi);
set_bit(WORK_ENABLE, &tp->flags);
- if (netif_carrier_ok(tp->netdev))
- rtl_start_rx(tp);
+
+ if (netif_carrier_ok(tp->netdev)) {
+ if (rtl8152_get_speed(tp) & LINK_STATUS) {
+ rtl_start_rx(tp);
+ } else {
+ netif_carrier_off(tp->netdev);
+ tp->rtl_ops.disable(tp);
+ netif_info(tp, link, tp->netdev,
+ "linking down\n");
+ }
+ }
+
napi_enable(&tp->napi);
clear_bit(SELECTIVE_SUSPEND, &tp->flags);
smp_mb__after_atomic();
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 9af9799..4cb9b11 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -2205,13 +2205,9 @@
{
u32 tx_cmd_a, tx_cmd_b;
- if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) {
- struct sk_buff *skb2 =
- skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags);
+ if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) {
dev_kfree_skb_any(skb);
- skb = skb2;
- if (!skb)
- return NULL;
+ return NULL;
}
tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS;
diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
index 4a1e9c4..aadfe1d 100644
--- a/drivers/net/usb/sr9700.c
+++ b/drivers/net/usb/sr9700.c
@@ -456,14 +456,9 @@
len = skb->len;
- if (skb_headroom(skb) < SR_TX_OVERHEAD) {
- struct sk_buff *skb2;
-
- skb2 = skb_copy_expand(skb, SR_TX_OVERHEAD, 0, flags);
+ if (skb_cow_head(skb, SR_TX_OVERHEAD)) {
dev_kfree_skb_any(skb);
- skb = skb2;
- if (!skb)
- return NULL;
+ return NULL;
}
__skb_push(skb, SR_TX_OVERHEAD);
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index d5071e3..4ab82b9 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -485,7 +485,10 @@
return -ENOLINK;
}
- skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
+ if (test_bit(EVENT_NO_IP_ALIGN, &dev->flags))
+ skb = __netdev_alloc_skb(dev->net, size, flags);
+ else
+ skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
if (!skb) {
netif_dbg(dev, rx_err, dev->net, "no rx skb\n");
usbnet_defer_kevent (dev, EVENT_RX_MEMORY);
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index ef83ae3..4afba17 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1616,7 +1616,6 @@
rq->rx_ring[i].basePA);
rq->rx_ring[i].base = NULL;
}
- rq->buf_info[i] = NULL;
}
if (rq->data_ring.base) {
@@ -1638,6 +1637,7 @@
(rq->rx_ring[0].size + rq->rx_ring[1].size);
dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
rq->buf_info_pa);
+ rq->buf_info[0] = rq->buf_info[1] = NULL;
}
}
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index e7f5910..f8eb66e 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -467,6 +467,9 @@
struct i2400mu *i2400mu;
struct usb_device *usb_dev = interface_to_usbdev(iface);
+ if (iface->cur_altsetting->desc.bNumEndpoints < 4)
+ return -ENODEV;
+
if (usb_dev->speed != USB_SPEED_HIGH)
dev_err(dev, "device not connected as high speed\n");
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 0b4c156..ba1fe61 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -548,6 +548,11 @@
return IEEE80211_TKIP_IV_LEN;
case HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2:
return IEEE80211_CCMP_HDR_LEN;
+ case HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2:
+ return IEEE80211_CCMP_256_HDR_LEN;
+ case HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2:
+ case HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2:
+ return IEEE80211_GCMP_HDR_LEN;
case HTT_RX_MPDU_ENCRYPT_WEP128:
case HTT_RX_MPDU_ENCRYPT_WAPI:
break;
@@ -573,6 +578,11 @@
return IEEE80211_TKIP_ICV_LEN;
case HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2:
return IEEE80211_CCMP_MIC_LEN;
+ case HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2:
+ return IEEE80211_CCMP_256_MIC_LEN;
+ case HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2:
+ case HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2:
+ return IEEE80211_GCMP_MIC_LEN;
case HTT_RX_MPDU_ENCRYPT_WEP128:
case HTT_RX_MPDU_ENCRYPT_WAPI:
break;
@@ -1024,9 +1034,21 @@
hdr = (void *)msdu->data;
/* Tail */
- if (status->flag & RX_FLAG_IV_STRIPPED)
+ if (status->flag & RX_FLAG_IV_STRIPPED) {
skb_trim(msdu, msdu->len -
ath10k_htt_rx_crypto_tail_len(ar, enctype));
+ } else {
+ /* MIC */
+ if ((status->flag & RX_FLAG_MIC_STRIPPED) &&
+ enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2)
+ skb_trim(msdu, msdu->len - 8);
+
+ /* ICV */
+ if (status->flag & RX_FLAG_ICV_STRIPPED &&
+ enctype != HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2)
+ skb_trim(msdu, msdu->len -
+ ath10k_htt_rx_crypto_tail_len(ar, enctype));
+ }
/* MMIC */
if ((status->flag & RX_FLAG_MMIC_STRIPPED) &&
@@ -1048,7 +1070,8 @@
static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar,
struct sk_buff *msdu,
struct ieee80211_rx_status *status,
- const u8 first_hdr[64])
+ const u8 first_hdr[64],
+ enum htt_rx_mpdu_encrypt_type enctype)
{
struct ieee80211_hdr *hdr;
struct htt_rx_desc *rxd;
@@ -1056,6 +1079,7 @@
u8 da[ETH_ALEN];
u8 sa[ETH_ALEN];
int l3_pad_bytes;
+ int bytes_aligned = ar->hw_params.decap_align_bytes;
/* Delivered decapped frame:
* [nwifi 802.11 header] <-- replaced with 802.11 hdr
@@ -1084,6 +1108,14 @@
/* push original 802.11 header */
hdr = (struct ieee80211_hdr *)first_hdr;
hdr_len = ieee80211_hdrlen(hdr->frame_control);
+
+ if (!(status->flag & RX_FLAG_IV_STRIPPED)) {
+ memcpy(skb_push(msdu,
+ ath10k_htt_rx_crypto_param_len(ar, enctype)),
+ (void *)hdr + round_up(hdr_len, bytes_aligned),
+ ath10k_htt_rx_crypto_param_len(ar, enctype));
+ }
+
memcpy(skb_push(msdu, hdr_len), hdr, hdr_len);
/* original 802.11 header has a different DA and in
@@ -1144,6 +1176,7 @@
u8 sa[ETH_ALEN];
int l3_pad_bytes;
struct htt_rx_desc *rxd;
+ int bytes_aligned = ar->hw_params.decap_align_bytes;
/* Delivered decapped frame:
* [eth header] <-- replaced with 802.11 hdr & rfc1042/llc
@@ -1172,6 +1205,14 @@
/* push original 802.11 header */
hdr = (struct ieee80211_hdr *)first_hdr;
hdr_len = ieee80211_hdrlen(hdr->frame_control);
+
+ if (!(status->flag & RX_FLAG_IV_STRIPPED)) {
+ memcpy(skb_push(msdu,
+ ath10k_htt_rx_crypto_param_len(ar, enctype)),
+ (void *)hdr + round_up(hdr_len, bytes_aligned),
+ ath10k_htt_rx_crypto_param_len(ar, enctype));
+ }
+
memcpy(skb_push(msdu, hdr_len), hdr, hdr_len);
/* original 802.11 header has a different DA and in
@@ -1185,12 +1226,14 @@
static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar,
struct sk_buff *msdu,
struct ieee80211_rx_status *status,
- const u8 first_hdr[64])
+ const u8 first_hdr[64],
+ enum htt_rx_mpdu_encrypt_type enctype)
{
struct ieee80211_hdr *hdr;
size_t hdr_len;
int l3_pad_bytes;
struct htt_rx_desc *rxd;
+ int bytes_aligned = ar->hw_params.decap_align_bytes;
/* Delivered decapped frame:
* [amsdu header] <-- replaced with 802.11 hdr
@@ -1206,6 +1249,14 @@
hdr = (struct ieee80211_hdr *)first_hdr;
hdr_len = ieee80211_hdrlen(hdr->frame_control);
+
+ if (!(status->flag & RX_FLAG_IV_STRIPPED)) {
+ memcpy(skb_push(msdu,
+ ath10k_htt_rx_crypto_param_len(ar, enctype)),
+ (void *)hdr + round_up(hdr_len, bytes_aligned),
+ ath10k_htt_rx_crypto_param_len(ar, enctype));
+ }
+
memcpy(skb_push(msdu, hdr_len), hdr, hdr_len);
}
@@ -1240,13 +1291,15 @@
is_decrypted);
break;
case RX_MSDU_DECAP_NATIVE_WIFI:
- ath10k_htt_rx_h_undecap_nwifi(ar, msdu, status, first_hdr);
+ ath10k_htt_rx_h_undecap_nwifi(ar, msdu, status, first_hdr,
+ enctype);
break;
case RX_MSDU_DECAP_ETHERNET2_DIX:
ath10k_htt_rx_h_undecap_eth(ar, msdu, status, first_hdr, enctype);
break;
case RX_MSDU_DECAP_8023_SNAP_LLC:
- ath10k_htt_rx_h_undecap_snap(ar, msdu, status, first_hdr);
+ ath10k_htt_rx_h_undecap_snap(ar, msdu, status, first_hdr,
+ enctype);
break;
}
}
@@ -1289,7 +1342,8 @@
static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
struct sk_buff_head *amsdu,
- struct ieee80211_rx_status *status)
+ struct ieee80211_rx_status *status,
+ bool fill_crypt_header)
{
struct sk_buff *first;
struct sk_buff *last;
@@ -1299,7 +1353,6 @@
enum htt_rx_mpdu_encrypt_type enctype;
u8 first_hdr[64];
u8 *qos;
- size_t hdr_len;
bool has_fcs_err;
bool has_crypto_err;
bool has_tkip_err;
@@ -1324,15 +1377,17 @@
* decapped header. It'll be used for undecapping of each MSDU.
*/
hdr = (void *)rxd->rx_hdr_status;
- hdr_len = ieee80211_hdrlen(hdr->frame_control);
- memcpy(first_hdr, hdr, hdr_len);
+ memcpy(first_hdr, hdr, RX_HTT_HDR_STATUS_LEN);
/* Each A-MSDU subframe will use the original header as the base and be
* reported as a separate MSDU so strip the A-MSDU bit from QoS Ctl.
*/
hdr = (void *)first_hdr;
- qos = ieee80211_get_qos_ctl(hdr);
- qos[0] &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT;
+
+ if (ieee80211_is_data_qos(hdr->frame_control)) {
+ qos = ieee80211_get_qos_ctl(hdr);
+ qos[0] &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT;
+ }
/* Some attention flags are valid only in the last MSDU. */
last = skb_peek_tail(amsdu);
@@ -1379,9 +1434,14 @@
status->flag |= RX_FLAG_DECRYPTED;
if (likely(!is_mgmt))
- status->flag |= RX_FLAG_IV_STRIPPED |
- RX_FLAG_MMIC_STRIPPED;
-}
+ status->flag |= RX_FLAG_MMIC_STRIPPED;
+
+ if (fill_crypt_header)
+ status->flag |= RX_FLAG_MIC_STRIPPED |
+ RX_FLAG_ICV_STRIPPED;
+ else
+ status->flag |= RX_FLAG_IV_STRIPPED;
+ }
skb_queue_walk(amsdu, msdu) {
ath10k_htt_rx_h_csum_offload(msdu);
@@ -1397,6 +1457,9 @@
if (is_mgmt)
continue;
+ if (fill_crypt_header)
+ continue;
+
hdr = (void *)msdu->data;
hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED);
}
@@ -1407,6 +1470,9 @@
struct ieee80211_rx_status *status)
{
struct sk_buff *msdu;
+ struct sk_buff *first_subframe;
+
+ first_subframe = skb_peek(amsdu);
while ((msdu = __skb_dequeue(amsdu))) {
/* Setup per-MSDU flags */
@@ -1415,6 +1481,13 @@
else
status->flag |= RX_FLAG_AMSDU_MORE;
+ if (msdu == first_subframe) {
+ first_subframe = NULL;
+ status->flag &= ~RX_FLAG_ALLOW_SAME_PN;
+ } else {
+ status->flag |= RX_FLAG_ALLOW_SAME_PN;
+ }
+
ath10k_process_rx(ar, status, msdu);
}
}
@@ -1557,7 +1630,7 @@
ath10k_htt_rx_h_ppdu(ar, &amsdu, rx_status, 0xffff);
ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0);
ath10k_htt_rx_h_filter(ar, &amsdu, rx_status);
- ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status);
+ ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true);
ath10k_htt_rx_h_deliver(ar, &amsdu, rx_status);
return num_msdus;
@@ -1892,7 +1965,7 @@
num_msdus += skb_queue_len(&amsdu);
ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id);
ath10k_htt_rx_h_filter(ar, &amsdu, status);
- ath10k_htt_rx_h_mpdu(ar, &amsdu, status);
+ ath10k_htt_rx_h_mpdu(ar, &amsdu, status, false);
ath10k_htt_rx_h_deliver(ar, &amsdu, status);
break;
case -EAGAIN:
diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h
index 034e7a5..e4878d0 100644
--- a/drivers/net/wireless/ath/ath10k/rx_desc.h
+++ b/drivers/net/wireless/ath/ath10k/rx_desc.h
@@ -239,6 +239,9 @@
HTT_RX_MPDU_ENCRYPT_WAPI = 5,
HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2 = 6,
HTT_RX_MPDU_ENCRYPT_NONE = 7,
+ HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2 = 8,
+ HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2 = 9,
+ HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2 = 10,
};
#define RX_MPDU_START_INFO0_PEER_IDX_MASK 0x000007ff
diff --git a/drivers/net/wireless/ath/ath9k/tx99.c b/drivers/net/wireless/ath/ath9k/tx99.c
index 1fa7f84..8e9480c 100644
--- a/drivers/net/wireless/ath/ath9k/tx99.c
+++ b/drivers/net/wireless/ath/ath9k/tx99.c
@@ -179,6 +179,9 @@
ssize_t len;
int r;
+ if (count < 1)
+ return -EINVAL;
+
if (sc->cur_chan->nvifs > 1)
return -EOPNOTSUPP;
@@ -186,6 +189,8 @@
if (copy_from_user(buf, user_buf, len))
return -EFAULT;
+ buf[len] = '\0';
+
if (strtobool(buf, &start))
return -EINVAL;
diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c
index 6e5d909..a635fc6 100644
--- a/drivers/net/wireless/broadcom/b43/main.c
+++ b/drivers/net/wireless/broadcom/b43/main.c
@@ -71,8 +71,18 @@
MODULE_FIRMWARE("b43/ucode13.fw");
MODULE_FIRMWARE("b43/ucode14.fw");
MODULE_FIRMWARE("b43/ucode15.fw");
+MODULE_FIRMWARE("b43/ucode16_lp.fw");
MODULE_FIRMWARE("b43/ucode16_mimo.fw");
+MODULE_FIRMWARE("b43/ucode24_lcn.fw");
+MODULE_FIRMWARE("b43/ucode25_lcn.fw");
+MODULE_FIRMWARE("b43/ucode25_mimo.fw");
+MODULE_FIRMWARE("b43/ucode26_mimo.fw");
+MODULE_FIRMWARE("b43/ucode29_mimo.fw");
+MODULE_FIRMWARE("b43/ucode33_lcn40.fw");
+MODULE_FIRMWARE("b43/ucode30_mimo.fw");
MODULE_FIRMWARE("b43/ucode5.fw");
+MODULE_FIRMWARE("b43/ucode40.fw");
+MODULE_FIRMWARE("b43/ucode42.fw");
MODULE_FIRMWARE("b43/ucode9.fw");
static int modparam_bad_frames_preempt;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 8e3c6f4..edffe5a 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4080,8 +4080,8 @@
sdio_release_host(sdiodev->func[1]);
fail:
brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err);
- device_release_driver(dev);
device_release_driver(&sdiodev->func[2]->dev);
+ device_release_driver(dev);
}
struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 9789f3c..f1231c0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -2320,7 +2320,7 @@
{
struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
- /* Called when we need to transmit (a) frame(s) from agg queue */
+ /* Called when we need to transmit (a) frame(s) from agg or dqa queue */
iwl_mvm_sta_modify_sleep_tx_count(mvm, sta, reason, num_frames,
tids, more_data, true);
@@ -2340,7 +2340,8 @@
for (tid = 0; tid < IWL_MAX_TID_COUNT; tid++) {
struct iwl_mvm_tid_data *tid_data = &mvmsta->tid_data[tid];
- if (tid_data->state != IWL_AGG_ON &&
+ if (!iwl_mvm_is_dqa_supported(mvm) &&
+ tid_data->state != IWL_AGG_ON &&
tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA)
continue;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index e64aeb4..bdd1dee 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -3032,7 +3032,7 @@
struct ieee80211_sta *sta,
enum ieee80211_frame_release_type reason,
u16 cnt, u16 tids, bool more_data,
- bool agg)
+ bool single_sta_queue)
{
struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
struct iwl_mvm_add_sta_cmd cmd = {
@@ -3052,14 +3052,14 @@
for_each_set_bit(tid, &_tids, IWL_MAX_TID_COUNT)
cmd.awake_acs |= BIT(tid_to_ucode_ac[tid]);
- /* If we're releasing frames from aggregation queues then check if the
- * all queues combined that we're releasing frames from have
+ /* If we're releasing frames from aggregation or dqa queues then check
+ * if all the queues that we're releasing frames from, combined, have:
* - more frames than the service period, in which case more_data
* needs to be set
* - fewer than 'cnt' frames, in which case we need to adjust the
* firmware command (but do that unconditionally)
*/
- if (agg) {
+ if (single_sta_queue) {
int remaining = cnt;
int sleep_tx_count;
@@ -3069,7 +3069,8 @@
u16 n_queued;
tid_data = &mvmsta->tid_data[tid];
- if (WARN(tid_data->state != IWL_AGG_ON &&
+ if (WARN(!iwl_mvm_is_dqa_supported(mvm) &&
+ tid_data->state != IWL_AGG_ON &&
tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA,
"TID %d state is %d\n",
tid, tid_data->state)) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index e068d53..f65950e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -545,7 +545,7 @@
struct ieee80211_sta *sta,
enum ieee80211_frame_release_type reason,
u16 cnt, u16 tids, bool more_data,
- bool agg);
+ bool single_sta_queue);
int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta,
bool drain);
void iwl_mvm_sta_modify_disable_tx(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 092ae00..7465d4d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -7,7 +7,7 @@
*
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
- * Copyright(c) 2016 Intel Deutschland GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -34,6 +34,7 @@
*
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -621,8 +622,10 @@
* values.
* Note that we don't need to make sure it isn't agg'd, since we're
* TXing non-sta
+ * For DQA mode - we shouldn't increase it though
*/
- atomic_inc(&mvm->pending_frames[sta_id]);
+ if (!iwl_mvm_is_dqa_supported(mvm))
+ atomic_inc(&mvm->pending_frames[sta_id]);
return 0;
}
@@ -1009,11 +1012,8 @@
spin_unlock(&mvmsta->lock);
- /* Increase pending frames count if this isn't AMPDU */
- if ((iwl_mvm_is_dqa_supported(mvm) &&
- mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_ON &&
- mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_STARTING) ||
- (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu))
+ /* Increase pending frames count if this isn't AMPDU or DQA queue */
+ if (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu)
atomic_inc(&mvm->pending_frames[mvmsta->sta_id]);
return 0;
@@ -1083,12 +1083,13 @@
lockdep_assert_held(&mvmsta->lock);
if ((tid_data->state == IWL_AGG_ON ||
- tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA) &&
+ tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA ||
+ iwl_mvm_is_dqa_supported(mvm)) &&
iwl_mvm_tid_queued(tid_data) == 0) {
/*
- * Now that this aggregation queue is empty tell mac80211 so it
- * knows we no longer have frames buffered for the station on
- * this TID (for the TIM bitmap calculation.)
+ * Now that this aggregation or DQA queue is empty tell
+ * mac80211 so it knows we no longer have frames buffered for
+ * the station on this TID (for the TIM bitmap calculation.)
*/
ieee80211_sta_set_buffered(sta, tid, false);
}
@@ -1261,7 +1262,6 @@
u8 skb_freed = 0;
u16 next_reclaimed, seq_ctl;
bool is_ndp = false;
- bool txq_agg = false; /* Is this TXQ aggregated */
__skb_queue_head_init(&skbs);
@@ -1287,6 +1287,10 @@
info->flags |= IEEE80211_TX_STAT_ACK;
break;
case TX_STATUS_FAIL_DEST_PS:
+ /* In DQA, the FW should have stopped the queue and not
+ * return this status
+ */
+ WARN_ON(iwl_mvm_is_dqa_supported(mvm));
info->flags |= IEEE80211_TX_STAT_TX_FILTERED;
break;
default:
@@ -1391,15 +1395,6 @@
bool send_eosp_ndp = false;
spin_lock_bh(&mvmsta->lock);
- if (iwl_mvm_is_dqa_supported(mvm)) {
- enum iwl_mvm_agg_state state;
-
- state = mvmsta->tid_data[tid].state;
- txq_agg = (state == IWL_AGG_ON ||
- state == IWL_EMPTYING_HW_QUEUE_DELBA);
- } else {
- txq_agg = txq_id >= mvm->first_agg_queue;
- }
if (!is_ndp) {
tid_data->next_reclaimed = next_reclaimed;
@@ -1456,11 +1451,11 @@
* If the txq is not an AMPDU queue, there is no chance we freed
* several skbs. Check that out...
*/
- if (txq_agg)
+ if (iwl_mvm_is_dqa_supported(mvm) || txq_id >= mvm->first_agg_queue)
goto out;
/* We can't free more than one frame at once on a shared queue */
- WARN_ON(!iwl_mvm_is_dqa_supported(mvm) && (skb_freed > 1));
+ WARN_ON(skb_freed > 1);
/* If we have still frames for this STA nothing to do here */
if (!atomic_sub_and_test(skb_freed, &mvm->pending_frames[sta_id]))
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index d04babd..ff5ce1e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -1040,6 +1040,8 @@
return le32_to_cpu(txq_timer->p2p_go);
case NL80211_IFTYPE_P2P_DEVICE:
return le32_to_cpu(txq_timer->p2p_device);
+ case NL80211_IFTYPE_MONITOR:
+ return default_timeout;
default:
WARN_ON(1);
return mvm->cfg->base_params->wd_timeout;
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index d2a28a9..4b462dc 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3047,6 +3047,7 @@
{
struct hwsim_new_radio_params param = { 0 };
const char *hwname = NULL;
+ int ret;
param.reg_strict = info->attrs[HWSIM_ATTR_REG_STRICT_REG];
param.p2p_device = info->attrs[HWSIM_ATTR_SUPPORT_P2P_DEVICE];
@@ -3086,7 +3087,9 @@
param.regd = hwsim_world_regdom_custom[idx];
}
- return mac80211_hwsim_new_radio(info, ¶m);
+ ret = mac80211_hwsim_new_radio(info, ¶m);
+ kfree(hwname);
+ return ret;
}
static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index cb7365b..5b1d2e8 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -113,10 +113,10 @@
* A subset of struct net_device_stats that contains only the
* fields that are updated in netback.c for each queue.
*/
- unsigned int rx_bytes;
- unsigned int rx_packets;
- unsigned int tx_bytes;
- unsigned int tx_packets;
+ u64 rx_bytes;
+ u64 rx_packets;
+ u64 tx_bytes;
+ u64 tx_packets;
/* Additional stats used by xenvif */
unsigned long rx_gso_checksum_fixup;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 5bfaf55..618013e7 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -225,10 +225,10 @@
{
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
- unsigned long rx_bytes = 0;
- unsigned long rx_packets = 0;
- unsigned long tx_bytes = 0;
- unsigned long tx_packets = 0;
+ u64 rx_bytes = 0;
+ u64 rx_packets = 0;
+ u64 tx_bytes = 0;
+ u64 tx_packets = 0;
unsigned int index;
spin_lock(&vif->lock);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index cd442e4..1a9dadf 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -86,6 +86,8 @@
/* IRQ name is queue name with "-tx" or "-rx" appended */
#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
+static DECLARE_WAIT_QUEUE_HEAD(module_unload_q);
+
struct netfront_stats {
u64 packets;
u64 bytes;
@@ -1854,27 +1856,19 @@
xennet_destroy_queues(info);
err = xennet_create_queues(info, &num_queues);
- if (err < 0)
- goto destroy_ring;
+ if (err < 0) {
+ xenbus_dev_fatal(dev, err, "creating queues");
+ kfree(info->queues);
+ info->queues = NULL;
+ goto out;
+ }
/* Create shared ring, alloc event channel -- for each queue */
for (i = 0; i < num_queues; ++i) {
queue = &info->queues[i];
err = setup_netfront(dev, queue, feature_split_evtchn);
- if (err) {
- /* setup_netfront() will tidy up the current
- * queue on error, but we need to clean up
- * those already allocated.
- */
- if (i > 0) {
- rtnl_lock();
- netif_set_real_num_tx_queues(info->netdev, i);
- rtnl_unlock();
- goto destroy_ring;
- } else {
- goto out;
- }
- }
+ if (err)
+ goto destroy_ring;
}
again:
@@ -1964,9 +1958,9 @@
xenbus_transaction_end(xbt, 1);
destroy_ring:
xennet_disconnect_backend(info);
- kfree(info->queues);
- info->queues = NULL;
+ xennet_destroy_queues(info);
out:
+ device_unregister(&dev->dev);
return err;
}
@@ -2059,10 +2053,12 @@
break;
case XenbusStateClosed:
+ wake_up_all(&module_unload_q);
if (dev->state == XenbusStateClosed)
break;
/* Missed the backend's CLOSING state -- fallthrough */
case XenbusStateClosing:
+ wake_up_all(&module_unload_q);
xenbus_frontend_closed(dev);
break;
}
@@ -2168,6 +2164,20 @@
dev_dbg(&dev->dev, "%s\n", dev->nodename);
+ if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) {
+ xenbus_switch_state(dev, XenbusStateClosing);
+ wait_event(module_unload_q,
+ xenbus_read_driver_state(dev->otherend) ==
+ XenbusStateClosing);
+
+ xenbus_switch_state(dev, XenbusStateClosed);
+ wait_event(module_unload_q,
+ xenbus_read_driver_state(dev->otherend) ==
+ XenbusStateClosed ||
+ xenbus_read_driver_state(dev->otherend) ==
+ XenbusStateUnknown);
+ }
+
xennet_disconnect_backend(info);
unregister_netdev(info->netdev);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 94733f7..7121453 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -183,13 +183,13 @@
return ret;
}
-static int btt_log_read_pair(struct arena_info *arena, u32 lane,
- struct log_entry *ent)
+static int btt_log_group_read(struct arena_info *arena, u32 lane,
+ struct log_group *log)
{
- WARN_ON(!ent);
+ WARN_ON(!log);
return arena_read_bytes(arena,
- arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
- 2 * LOG_ENT_SIZE);
+ arena->logoff + (lane * LOG_GRP_SIZE), log,
+ LOG_GRP_SIZE);
}
static struct dentry *debugfs_root;
@@ -229,6 +229,8 @@
debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
+ debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
+ debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
}
static void btt_debugfs_init(struct btt *btt)
@@ -247,6 +249,11 @@
}
}
+static u32 log_seq(struct log_group *log, int log_idx)
+{
+ return le32_to_cpu(log->ent[log_idx].seq);
+}
+
/*
* This function accepts two log entries, and uses the
* sequence number to find the 'older' entry.
@@ -256,8 +263,10 @@
*
* TODO The logic feels a bit kludge-y. make it better..
*/
-static int btt_log_get_old(struct log_entry *ent)
+static int btt_log_get_old(struct arena_info *a, struct log_group *log)
{
+ int idx0 = a->log_index[0];
+ int idx1 = a->log_index[1];
int old;
/*
@@ -265,23 +274,23 @@
* the next time, the following logic works out to put this
* (next) entry into [1]
*/
- if (ent[0].seq == 0) {
- ent[0].seq = cpu_to_le32(1);
+ if (log_seq(log, idx0) == 0) {
+ log->ent[idx0].seq = cpu_to_le32(1);
return 0;
}
- if (ent[0].seq == ent[1].seq)
+ if (log_seq(log, idx0) == log_seq(log, idx1))
return -EINVAL;
- if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
+ if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
return -EINVAL;
- if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
- if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
+ if (log_seq(log, idx0) < log_seq(log, idx1)) {
+ if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
old = 0;
else
old = 1;
} else {
- if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
+ if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
old = 1;
else
old = 0;
@@ -306,17 +315,18 @@
{
int ret;
int old_ent, ret_ent;
- struct log_entry log[2];
+ struct log_group log;
- ret = btt_log_read_pair(arena, lane, log);
+ ret = btt_log_group_read(arena, lane, &log);
if (ret)
return -EIO;
- old_ent = btt_log_get_old(log);
+ old_ent = btt_log_get_old(arena, &log);
if (old_ent < 0 || old_ent > 1) {
dev_info(to_dev(arena),
"log corruption (%d): lane %d seq [%d, %d]\n",
- old_ent, lane, log[0].seq, log[1].seq);
+ old_ent, lane, log.ent[arena->log_index[0]].seq,
+ log.ent[arena->log_index[1]].seq);
/* TODO set error state? */
return -EIO;
}
@@ -324,7 +334,7 @@
ret_ent = (old_flag ? old_ent : (1 - old_ent));
if (ent != NULL)
- memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
+ memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
return ret_ent;
}
@@ -338,17 +348,13 @@
u32 sub, struct log_entry *ent)
{
int ret;
- /*
- * Ignore the padding in log_entry for calculating log_half.
- * The entry is 'committed' when we write the sequence number,
- * and we want to ensure that that is the last thing written.
- * We don't bother writing the padding as that would be extra
- * media wear and write amplification
- */
- unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
- u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
+ u32 group_slot = arena->log_index[sub];
+ unsigned int log_half = LOG_ENT_SIZE / 2;
void *src = ent;
+ u64 ns_off;
+ ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
+ (group_slot * LOG_ENT_SIZE);
/* split the 16B write into atomic, durable halves */
ret = arena_write_bytes(arena, ns_off, src, log_half);
if (ret)
@@ -419,16 +425,16 @@
{
int ret;
u32 i;
- struct log_entry log, zerolog;
+ struct log_entry ent, zerolog;
memset(&zerolog, 0, sizeof(zerolog));
for (i = 0; i < arena->nfree; i++) {
- log.lba = cpu_to_le32(i);
- log.old_map = cpu_to_le32(arena->external_nlba + i);
- log.new_map = cpu_to_le32(arena->external_nlba + i);
- log.seq = cpu_to_le32(LOG_SEQ_INIT);
- ret = __btt_log_write(arena, i, 0, &log);
+ ent.lba = cpu_to_le32(i);
+ ent.old_map = cpu_to_le32(arena->external_nlba + i);
+ ent.new_map = cpu_to_le32(arena->external_nlba + i);
+ ent.seq = cpu_to_le32(LOG_SEQ_INIT);
+ ret = __btt_log_write(arena, i, 0, &ent);
if (ret)
return ret;
ret = __btt_log_write(arena, i, 1, &zerolog);
@@ -490,6 +496,123 @@
return 0;
}
+static bool ent_is_padding(struct log_entry *ent)
+{
+ return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
+ && (ent->seq == 0);
+}
+
+/*
+ * Detecting valid log indices: We read a log group (see the comments in btt.h
+ * for a description of a 'log_group' and its 'slots'), and iterate over its
+ * four slots. We expect that a padding slot will be all-zeroes, and use this
+ * to detect a padding slot vs. an actual entry.
+ *
+ * If a log_group is in the initial state, i.e. hasn't been used since the
+ * creation of this BTT layout, it will have three of the four slots with
+ * zeroes. We skip over these log_groups for the detection of log_index. If
+ * all log_groups are in the initial state (i.e. the BTT has never been
+ * written to), it is safe to assume the 'new format' of log entries in slots
+ * (0, 1).
+ */
+static int log_set_indices(struct arena_info *arena)
+{
+ bool idx_set = false, initial_state = true;
+ int ret, log_index[2] = {-1, -1};
+ u32 i, j, next_idx = 0;
+ struct log_group log;
+ u32 pad_count = 0;
+
+ for (i = 0; i < arena->nfree; i++) {
+ ret = btt_log_group_read(arena, i, &log);
+ if (ret < 0)
+ return ret;
+
+ for (j = 0; j < 4; j++) {
+ if (!idx_set) {
+ if (ent_is_padding(&log.ent[j])) {
+ pad_count++;
+ continue;
+ } else {
+ /* Skip if index has been recorded */
+ if ((next_idx == 1) &&
+ (j == log_index[0]))
+ continue;
+ /* valid entry, record index */
+ log_index[next_idx] = j;
+ next_idx++;
+ }
+ if (next_idx == 2) {
+ /* two valid entries found */
+ idx_set = true;
+ } else if (next_idx > 2) {
+ /* too many valid indices */
+ return -ENXIO;
+ }
+ } else {
+ /*
+ * once the indices have been set, just verify
+ * that all subsequent log groups are either in
+ * their initial state or follow the same
+ * indices.
+ */
+ if (j == log_index[0]) {
+ /* entry must be 'valid' */
+ if (ent_is_padding(&log.ent[j]))
+ return -ENXIO;
+ } else if (j == log_index[1]) {
+ ;
+ /*
+ * log_index[1] can be padding if the
+ * lane never got used and it is still
+ * in the initial state (three 'padding'
+ * entries)
+ */
+ } else {
+ /* entry must be invalid (padding) */
+ if (!ent_is_padding(&log.ent[j]))
+ return -ENXIO;
+ }
+ }
+ }
+ /*
+ * If any of the log_groups have more than one valid,
+ * non-padding entry, then the we are no longer in the
+ * initial_state
+ */
+ if (pad_count < 3)
+ initial_state = false;
+ pad_count = 0;
+ }
+
+ if (!initial_state && !idx_set)
+ return -ENXIO;
+
+ /*
+ * If all the entries in the log were in the initial state,
+ * assume new padding scheme
+ */
+ if (initial_state)
+ log_index[1] = 1;
+
+ /*
+ * Only allow the known permutations of log/padding indices,
+ * i.e. (0, 1), and (0, 2)
+ */
+ if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
+ ; /* known index possibilities */
+ else {
+ dev_err(to_dev(arena), "Found an unknown padding scheme\n");
+ return -ENXIO;
+ }
+
+ arena->log_index[0] = log_index[0];
+ arena->log_index[1] = log_index[1];
+ dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
+ dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
+ return 0;
+}
+
static int btt_rtt_init(struct arena_info *arena)
{
arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -545,8 +668,7 @@
available -= 2 * BTT_PG_SIZE;
/* The log takes a fixed amount of space based on nfree */
- logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
- BTT_PG_SIZE);
+ logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
available -= logsize;
/* Calculate optimal split between map and data area */
@@ -563,6 +685,10 @@
arena->mapoff = arena->dataoff + datasize;
arena->logoff = arena->mapoff + mapsize;
arena->info2off = arena->logoff + logsize;
+
+ /* Default log indices are (0,1) */
+ arena->log_index[0] = 0;
+ arena->log_index[1] = 1;
return arena;
}
@@ -653,6 +779,13 @@
arena->external_lba_start = cur_nlba;
parse_arena_meta(arena, super, cur_off);
+ ret = log_set_indices(arena);
+ if (ret) {
+ dev_err(to_dev(arena),
+ "Unable to deduce log/padding indices\n");
+ goto out;
+ }
+
ret = btt_freelist_init(arena);
if (ret)
goto out;
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index b2f8651..0f80b6b 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -26,6 +26,7 @@
#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
#define MAP_ENT_NORMAL 0xC0000000
+#define LOG_GRP_SIZE sizeof(struct log_group)
#define LOG_ENT_SIZE sizeof(struct log_entry)
#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */
#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */
@@ -44,12 +45,52 @@
INIT_READY
};
+/*
+ * A log group represents one log 'lane', and consists of four log entries.
+ * Two of the four entries are valid entries, and the remaining two are
+ * padding. Due to an old bug in the padding location, we need to perform a
+ * test to determine the padding scheme being used, and use that scheme
+ * thereafter.
+ *
+ * In kernels prior to 4.15, 'log group' would have actual log entries at
+ * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
+ * format has log entries at indices (0, 1) and padding at indices (2, 3).
+ *
+ * Old (pre 4.15) format:
+ * +-----------------+-----------------+
+ * | ent[0] | ent[1] |
+ * | 16B | 16B |
+ * | lba/old/new/seq | pad |
+ * +-----------------------------------+
+ * | ent[2] | ent[3] |
+ * | 16B | 16B |
+ * | lba/old/new/seq | pad |
+ * +-----------------+-----------------+
+ *
+ * New format:
+ * +-----------------+-----------------+
+ * | ent[0] | ent[1] |
+ * | 16B | 16B |
+ * | lba/old/new/seq | lba/old/new/seq |
+ * +-----------------------------------+
+ * | ent[2] | ent[3] |
+ * | 16B | 16B |
+ * | pad | pad |
+ * +-----------------+-----------------+
+ *
+ * We detect during start-up which format is in use, and set
+ * arena->log_index[(0, 1)] with the detected format.
+ */
+
struct log_entry {
__le32 lba;
__le32 old_map;
__le32 new_map;
__le32 seq;
- __le64 padding[2];
+};
+
+struct log_group {
+ struct log_entry ent[4];
};
struct btt_sb {
@@ -117,6 +158,7 @@
* @list: List head for list of arenas
* @debugfs_dir: Debugfs dentry
* @flags: Arena flags - may signify error states.
+ * @log_index: Indices of the valid log entries in a log_group
*
* arena_info is a per-arena handle. Once an arena is narrowed down for an
* IO, this struct is passed around for the duration of the IO.
@@ -147,6 +189,7 @@
struct dentry *debugfs_dir;
/* Arena flags */
u32 flags;
+ int log_index[2];
};
/**
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 71eb6c6..42abdd2 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -352,9 +352,9 @@
int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
{
u64 checksum, offset;
- unsigned long align;
enum nd_pfn_mode mode;
struct nd_namespace_io *nsio;
+ unsigned long align, start_pad;
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
struct nd_namespace_common *ndns = nd_pfn->ndns;
const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
@@ -398,6 +398,7 @@
align = le32_to_cpu(pfn_sb->align);
offset = le64_to_cpu(pfn_sb->dataoff);
+ start_pad = le32_to_cpu(pfn_sb->start_pad);
if (align == 0)
align = 1UL << ilog2(offset);
mode = le32_to_cpu(pfn_sb->mode);
@@ -456,7 +457,7 @@
return -EBUSY;
}
- if ((align && !IS_ALIGNED(offset, align))
+ if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
|| !IS_ALIGNED(offset, PAGE_SIZE)) {
dev_err(&nd_pfn->dev,
"bad offset: %#llx dax disabled align: %#lx\n",
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index fbeca06..719ee5f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1619,7 +1619,8 @@
mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry(ns, &ctrl->namespaces, list) {
if (ns->ns_id == nsid) {
- kref_get(&ns->kref);
+ if (!kref_get_unless_zero(&ns->kref))
+ continue;
ret = ns;
break;
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 8edafd8..5c52a61 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -84,7 +84,7 @@
* NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was
* found empirically.
*/
-#define NVME_QUIRK_DELAY_AMOUNT 2000
+#define NVME_QUIRK_DELAY_AMOUNT 2300
enum nvme_ctrl_state {
NVME_CTRL_NEW,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 54ea90f..e48ecb9 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2109,6 +2109,8 @@
.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
{ PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+ { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 55ce769..c89d68a 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -422,6 +422,13 @@
ctrl->sqs[qid] = sq;
}
+static void nvmet_confirm_sq(struct percpu_ref *ref)
+{
+ struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
+
+ complete(&sq->confirm_done);
+}
+
void nvmet_sq_destroy(struct nvmet_sq *sq)
{
/*
@@ -430,7 +437,8 @@
*/
if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
nvmet_async_events_free(sq->ctrl);
- percpu_ref_kill(&sq->ref);
+ percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
+ wait_for_completion(&sq->confirm_done);
wait_for_completion(&sq->free_done);
percpu_ref_exit(&sq->ref);
@@ -458,6 +466,7 @@
return ret;
}
init_completion(&sq->free_done);
+ init_completion(&sq->confirm_done);
return 0;
}
@@ -816,6 +825,9 @@
list_del(&ctrl->subsys_entry);
mutex_unlock(&subsys->lock);
+ flush_work(&ctrl->async_event_work);
+ cancel_work_sync(&ctrl->fatal_err_work);
+
ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid);
nvmet_subsys_put(subsys);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index d5df77d..e56ca3f 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -223,8 +223,6 @@
static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
struct nvme_loop_iod *iod, unsigned int queue_idx)
{
- BUG_ON(queue_idx >= ctrl->queue_count);
-
iod->req.cmd = &iod->cmd;
iod->req.rsp = &iod->rsp;
iod->queue = &ctrl->queues[queue_idx];
@@ -288,9 +286,9 @@
static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
{
+ nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
blk_cleanup_queue(ctrl->ctrl.admin_q);
blk_mq_free_tag_set(&ctrl->admin_tag_set);
- nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
}
static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
@@ -314,6 +312,43 @@
kfree(ctrl);
}
+static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
+{
+ int i;
+
+ for (i = 1; i < ctrl->queue_count; i++)
+ nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+}
+
+static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
+{
+ struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+ unsigned int nr_io_queues;
+ int ret, i;
+
+ nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
+ ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
+ if (ret || !nr_io_queues)
+ return ret;
+
+ dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues);
+
+ for (i = 1; i <= nr_io_queues; i++) {
+ ctrl->queues[i].ctrl = ctrl;
+ ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
+ if (ret)
+ goto out_destroy_queues;
+
+ ctrl->queue_count++;
+ }
+
+ return 0;
+
+out_destroy_queues:
+ nvme_loop_destroy_io_queues(ctrl);
+ return ret;
+}
+
static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
{
int error;
@@ -385,17 +420,13 @@
static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
{
- int i;
-
nvme_stop_keep_alive(&ctrl->ctrl);
if (ctrl->queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_cancel_request, &ctrl->ctrl);
-
- for (i = 1; i < ctrl->queue_count; i++)
- nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+ nvme_loop_destroy_io_queues(ctrl);
}
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
@@ -467,19 +498,14 @@
if (ret)
goto out_disable;
- for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
- ctrl->queues[i].ctrl = ctrl;
- ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
- if (ret)
- goto out_free_queues;
+ ret = nvme_loop_init_io_queues(ctrl);
+ if (ret)
+ goto out_destroy_admin;
- ctrl->queue_count++;
- }
-
- for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
+ for (i = 1; i < ctrl->queue_count; i++) {
ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
if (ret)
- goto out_free_queues;
+ goto out_destroy_io;
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -492,9 +518,9 @@
return;
-out_free_queues:
- for (i = 1; i < ctrl->queue_count; i++)
- nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+out_destroy_io:
+ nvme_loop_destroy_io_queues(ctrl);
+out_destroy_admin:
nvme_loop_destroy_admin_queue(ctrl);
out_disable:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
@@ -533,25 +559,12 @@
static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
{
- struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
int ret, i;
- ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
- if (ret || !opts->nr_io_queues)
+ ret = nvme_loop_init_io_queues(ctrl);
+ if (ret)
return ret;
- dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
- opts->nr_io_queues);
-
- for (i = 1; i <= opts->nr_io_queues; i++) {
- ctrl->queues[i].ctrl = ctrl;
- ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
- if (ret)
- goto out_destroy_queues;
-
- ctrl->queue_count++;
- }
-
memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
ctrl->tag_set.ops = &nvme_loop_mq_ops;
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
@@ -575,7 +588,7 @@
goto out_free_tagset;
}
- for (i = 1; i <= opts->nr_io_queues; i++) {
+ for (i = 1; i < ctrl->queue_count; i++) {
ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
if (ret)
goto out_cleanup_connect_q;
@@ -588,8 +601,7 @@
out_free_tagset:
blk_mq_free_tag_set(&ctrl->tag_set);
out_destroy_queues:
- for (i = 1; i < ctrl->queue_count; i++)
- nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+ nvme_loop_destroy_io_queues(ctrl);
return ret;
}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 7655a35..26b87dc 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -73,6 +73,7 @@
u16 qid;
u16 size;
struct completion free_done;
+ struct completion confirm_done;
};
/**
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index ca8ddc3..53bd325 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -703,11 +703,6 @@
{
u16 status;
- cmd->queue = queue;
- cmd->n_rdma = 0;
- cmd->req.port = queue->port;
-
-
ib_dma_sync_single_for_cpu(queue->dev->device,
cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length,
DMA_FROM_DEVICE);
@@ -760,9 +755,12 @@
cmd->queue = queue;
rsp = nvmet_rdma_get_rsp(queue);
+ rsp->queue = queue;
rsp->cmd = cmd;
rsp->flags = 0;
rsp->req.cmd = cmd->nvme_cmd;
+ rsp->req.port = queue->port;
+ rsp->n_rdma = 0;
if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
unsigned long flags;
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index bc286cb..1cced1d 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1656,3 +1656,36 @@
iounmap(base_addr);
}
+
+/*
+ * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
+ * seems rushed, so that many built-in components simply don't work.
+ * The following quirks disable the serial AUX port and the built-in ATI RV100
+ * Radeon 7000 graphics card which both don't have any external connectors and
+ * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
+ * such makes those machines the only PARISC machines on which we can't use
+ * ttyS0 as boot console.
+ */
+static void quirk_diva_ati_card(struct pci_dev *dev)
+{
+ if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+ dev->subsystem_device != 0x1292)
+ return;
+
+ dev_info(&dev->dev, "Hiding Diva built-in ATI card");
+ dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
+ quirk_diva_ati_card);
+
+static void quirk_diva_aux_disable(struct pci_dev *dev)
+{
+ if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+ dev->subsystem_device != 0x1291)
+ return;
+
+ dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
+ dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
+ quirk_diva_aux_disable);
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 4722782..1d32fe2 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -164,7 +164,6 @@
pci_device_add(virtfn, virtfn->bus);
mutex_unlock(&iov->dev->sriov->lock);
- pci_bus_add_device(virtfn);
sprintf(buf, "virtfn%u", id);
rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
if (rc)
@@ -175,6 +174,8 @@
kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
+ pci_bus_add_device(virtfn);
+
return 0;
failed2:
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 8a68e2b..802997e 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -953,7 +953,12 @@
if (pci_has_legacy_pm_support(pci_dev))
return pci_legacy_resume_early(dev);
- pci_update_current_state(pci_dev, PCI_D0);
+ /*
+ * pci_restore_state() requires the device to be in D0 (because of MSI
+ * restoration among other things), so force it into D0 in case the
+ * driver's "freeze" callbacks put it into a low-power state directly.
+ */
+ pci_set_power_state(pci_dev, PCI_D0);
pci_restore_state(pci_dev);
if (drv && drv->pm && drv->pm->thaw_noirq)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e7d4048..a87c8e1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4214,6 +4214,10 @@
{
struct pci_dev *dev;
+
+ if (bus->self && (bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET))
+ return false;
+
list_for_each_entry(dev, &bus->devices, bus_list) {
if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET ||
(dev->subordinate && !pci_bus_resetable(dev->subordinate)))
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index b1303b3..057465ad 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -390,7 +390,14 @@
* If the error is reported by an end point, we think this
* error is related to the upstream link of the end point.
*/
- pci_walk_bus(dev->bus, cb, &result_data);
+ if (state == pci_channel_io_normal)
+ /*
+ * the error is non fatal so the bus is ok, just invoke
+ * the callback for the function that logged the error.
+ */
+ cb(dev, &result_data);
+ else
+ pci_walk_bus(dev->bus, cb, &result_data);
}
return result_data.result;
diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index 4b70349..00f61225 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c
@@ -232,6 +232,9 @@
break;
pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta);
+ if (rtsta == (u32) ~0)
+ break;
+
if (rtsta & PCI_EXP_RTSTA_PME) {
/*
* Clear PME status of the port. If there are other
@@ -279,7 +282,7 @@
spin_lock_irqsave(&data->lock, flags);
pcie_capability_read_dword(port, PCI_EXP_RTSTA, &rtsta);
- if (!(rtsta & PCI_EXP_RTSTA_PME)) {
+ if (rtsta == (u32) ~0 || !(rtsta & PCI_EXP_RTSTA_PME)) {
spin_unlock_irqrestore(&data->lock, flags);
return IRQ_NONE;
}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 60bada9..a98be6d 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -932,7 +932,8 @@
child = pci_add_new_bus(bus, dev, max+1);
if (!child)
goto out;
- pci_bus_insert_busn_res(child, max+1, 0xff);
+ pci_bus_insert_busn_res(child, max+1,
+ bus->busn_res.end);
}
max++;
buses = (buses & 0xff000000)
@@ -2136,6 +2137,10 @@
if (bus->self && bus->self->is_hotplug_bridge && pci_hotplug_bus_size) {
if (max - bus->busn_res.start < pci_hotplug_bus_size - 1)
max = bus->busn_res.start + pci_hotplug_bus_size - 1;
+
+ /* Do not allocate more buses than we have room left */
+ if (max > bus->busn_res.end)
+ max = bus->busn_res.end;
}
/*
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index f9357e0..b6b9b5b 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -19,9 +19,9 @@
pci_pme_active(dev, false);
if (dev->is_added) {
+ device_release_driver(&dev->dev);
pci_proc_detach_device(dev);
pci_remove_sysfs_dev_files(dev);
- device_release_driver(&dev->dev);
dev->is_added = 0;
}
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index a268f4d..48a365e 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -395,6 +395,10 @@
if (ret)
return ERR_PTR(-ENODEV);
+ /* This phy type handled by the usb-phy subsystem for now */
+ if (of_device_is_compatible(args.np, "usb-nop-xceiv"))
+ return ERR_PTR(-ENODEV);
+
mutex_lock(&phy_provider_mutex);
phy_provider = of_phy_provider_lookup(args.np);
if (IS_ERR(phy_provider) || !try_module_get(phy_provider->owner)) {
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index 671610c..b0c0fa0 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -26,7 +26,8 @@
config PINCTRL_ADI2
bool "ADI pin controller driver"
- depends on BLACKFIN
+ depends on (BF54x || BF60x)
+ depends on !GPIO_ADI
select PINMUX
select IRQ_DOMAIN
help
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index 0d34d8a4..e8c08eb 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1594,6 +1594,22 @@
clear_bit(i, chip->irq_valid_mask);
}
+ /*
+ * The same set of machines in chv_no_valid_mask[] have incorrectly
+ * configured GPIOs that generate spurious interrupts so we use
+ * this same list to apply another quirk for them.
+ *
+ * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953.
+ */
+ if (!need_valid_mask) {
+ /*
+ * Mask all interrupts the community is able to generate
+ * but leave the ones that can only generate GPEs unmasked.
+ */
+ chv_writel(GENMASK(31, pctrl->community->nirqs),
+ pctrl->regs + CHV_INTMASK);
+ }
+
/* Clear all interrupts */
chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index b40a074..df63b7d 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -368,6 +368,18 @@
writel(value, padcfg0);
}
+static void intel_gpio_set_gpio_mode(void __iomem *padcfg0)
+{
+ u32 value;
+
+ /* Put the pad into GPIO mode */
+ value = readl(padcfg0) & ~PADCFG0_PMODE_MASK;
+ /* Disable SCI/SMI/NMI generation */
+ value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI);
+ value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI);
+ writel(value, padcfg0);
+}
+
static int intel_gpio_request_enable(struct pinctrl_dev *pctldev,
struct pinctrl_gpio_range *range,
unsigned pin)
@@ -375,7 +387,6 @@
struct intel_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev);
void __iomem *padcfg0;
unsigned long flags;
- u32 value;
raw_spin_lock_irqsave(&pctrl->lock, flags);
@@ -385,13 +396,7 @@
}
padcfg0 = intel_get_padcfg(pctrl, pin, PADCFG0);
- /* Put the pad into GPIO mode */
- value = readl(padcfg0) & ~PADCFG0_PMODE_MASK;
- /* Disable SCI/SMI/NMI generation */
- value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI);
- value &= ~(PADCFG0_GPIROUTSMI | PADCFG0_GPIROUTNMI);
- writel(value, padcfg0);
-
+ intel_gpio_set_gpio_mode(padcfg0);
/* Disable TX buffer and enable RX (this will be input) */
__intel_gpio_set_direction(padcfg0, true);
@@ -770,6 +775,8 @@
raw_spin_lock_irqsave(&pctrl->lock, flags);
+ intel_gpio_set_gpio_mode(reg);
+
value = readl(reg);
value &= ~(PADCFG0_RXEVCFG_MASK | PADCFG0_RXINV);
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index b7bb371..50c45bd 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -1285,6 +1285,22 @@
writel(BIT(d->hwirq), bank->base + REG_PIO_SET_PMASK);
}
+static int st_gpio_irq_request_resources(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+
+ st_gpio_direction_input(gc, d->hwirq);
+
+ return gpiochip_lock_as_irq(gc, d->hwirq);
+}
+
+static void st_gpio_irq_release_resources(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+
+ gpiochip_unlock_as_irq(gc, d->hwirq);
+}
+
static int st_gpio_irq_set_type(struct irq_data *d, unsigned type)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -1438,12 +1454,14 @@
};
static struct irq_chip st_gpio_irqchip = {
- .name = "GPIO",
- .irq_disable = st_gpio_irq_mask,
- .irq_mask = st_gpio_irq_mask,
- .irq_unmask = st_gpio_irq_unmask,
- .irq_set_type = st_gpio_irq_set_type,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .name = "GPIO",
+ .irq_request_resources = st_gpio_irq_request_resources,
+ .irq_release_resources = st_gpio_irq_release_resources,
+ .irq_disable = st_gpio_irq_mask,
+ .irq_mask = st_gpio_irq_mask,
+ .irq_unmask = st_gpio_irq_unmask,
+ .irq_set_type = st_gpio_irq_set_type,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
};
static int st_gpiolib_register_bank(struct st_pinctrl *info,
diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
index 866aa3c..6cf0006 100644
--- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
+++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
@@ -436,3 +436,7 @@
return 0;
}
EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit);
+
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c
index 9f31bc1..1871602 100644
--- a/drivers/platform/x86/asus-wireless.c
+++ b/drivers/platform/x86/asus-wireless.c
@@ -97,6 +97,7 @@
return;
}
input_report_key(data->idev, KEY_RFKILL, 1);
+ input_sync(data->idev);
input_report_key(data->idev, KEY_RFKILL, 0);
input_sync(data->idev);
}
diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c
index 0935668..abd9d83 100644
--- a/drivers/platform/x86/hp_accel.c
+++ b/drivers/platform/x86/hp_accel.c
@@ -240,6 +240,7 @@
AXIS_DMI_MATCH("HDX18", "HP HDX 18", x_inverted),
AXIS_DMI_MATCH("HPB432x", "HP ProBook 432", xy_rotated_left),
AXIS_DMI_MATCH("HPB440G3", "HP ProBook 440 G3", x_inverted_usd),
+ AXIS_DMI_MATCH("HPB440G4", "HP ProBook 440 G4", x_inverted),
AXIS_DMI_MATCH("HPB442x", "HP ProBook 442", xy_rotated_left),
AXIS_DMI_MATCH("HPB452x", "HP ProBook 452", y_inverted),
AXIS_DMI_MATCH("HPB522x", "HP ProBook 522", xy_swap),
diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c
index a47a41f..b5b8901 100644
--- a/drivers/platform/x86/intel_punit_ipc.c
+++ b/drivers/platform/x86/intel_punit_ipc.c
@@ -252,28 +252,28 @@
* - GTDRIVER_IPC BASE_IFACE
*/
res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
- if (res) {
+ if (res && resource_size(res) > 1) {
addr = devm_ioremap_resource(&pdev->dev, res);
if (!IS_ERR(addr))
punit_ipcdev->base[ISPDRIVER_IPC][BASE_DATA] = addr;
}
res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
- if (res) {
+ if (res && resource_size(res) > 1) {
addr = devm_ioremap_resource(&pdev->dev, res);
if (!IS_ERR(addr))
punit_ipcdev->base[ISPDRIVER_IPC][BASE_IFACE] = addr;
}
res = platform_get_resource(pdev, IORESOURCE_MEM, 4);
- if (res) {
+ if (res && resource_size(res) > 1) {
addr = devm_ioremap_resource(&pdev->dev, res);
if (!IS_ERR(addr))
punit_ipcdev->base[GTDRIVER_IPC][BASE_DATA] = addr;
}
res = platform_get_resource(pdev, IORESOURCE_MEM, 5);
- if (res) {
+ if (res && resource_size(res) > 1) {
addr = devm_ioremap_resource(&pdev->dev, res);
if (!IS_ERR(addr))
punit_ipcdev->base[GTDRIVER_IPC][BASE_IFACE] = addr;
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index ceeb8c1..00d82e8 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -848,5 +848,5 @@
pr_info("Mapper unloaded\n");
}
-subsys_initcall(acpi_wmi_init);
+subsys_initcall_sync(acpi_wmi_init);
module_exit(acpi_wmi_exit);
diff --git a/drivers/power/reset/zx-reboot.c b/drivers/power/reset/zx-reboot.c
index b0b1eb3..76153ac 100644
--- a/drivers/power/reset/zx-reboot.c
+++ b/drivers/power/reset/zx-reboot.c
@@ -81,3 +81,7 @@
},
};
module_platform_driver(zx_reboot_driver);
+
+MODULE_DESCRIPTION("ZTE SoCs reset driver");
+MODULE_AUTHOR("Jun Nie <jun.nie@linaro.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c
index 9013a58..f32fc70 100644
--- a/drivers/rapidio/devices/rio_mport_cdev.c
+++ b/drivers/rapidio/devices/rio_mport_cdev.c
@@ -964,7 +964,8 @@
req->sgt.sgl, req->sgt.nents, dir);
if (nents == -EFAULT) {
rmcd_error("Failed to map SG list");
- return -EFAULT;
+ ret = -EFAULT;
+ goto err_pg;
}
ret = do_dma_request(req, xfer, sync, nents);
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 84a52db..6ebd42a 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -772,7 +772,7 @@
}
timerqueue_add(&rtc->timerqueue, &timer->node);
- if (!next) {
+ if (!next || ktime_before(timer->node.expires, next->expires)) {
struct rtc_wkalrm alarm;
int err;
alarm.time = rtc_ktime_to_tm(timer->node.expires);
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index 1227cea..a4b8b60 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -422,7 +422,7 @@
return 0;
buf &= PCF8563_REG_CLKO_F_MASK;
- return clkout_rates[ret];
+ return clkout_rates[buf];
}
static long pcf8563_clkout_round_rate(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index e1687e1..a30f24c 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -308,7 +308,8 @@
dev_pm_clear_wake_irq(&adev->dev);
device_init_wakeup(&adev->dev, false);
- free_irq(adev->irq[0], ldata);
+ if (adev->irq[0])
+ free_irq(adev->irq[0], ldata);
rtc_device_unregister(ldata->rtc);
iounmap(ldata->base);
kfree(ldata);
@@ -381,12 +382,13 @@
goto out_no_rtc;
}
- if (request_irq(adev->irq[0], pl031_interrupt,
- vendor->irqflags, "rtc-pl031", ldata)) {
- ret = -EIO;
- goto out_no_irq;
+ if (adev->irq[0]) {
+ ret = request_irq(adev->irq[0], pl031_interrupt,
+ vendor->irqflags, "rtc-pl031", ldata);
+ if (ret)
+ goto out_no_irq;
+ dev_pm_set_wake_irq(&adev->dev, adev->irq[0]);
}
- dev_pm_set_wake_irq(&adev->dev, adev->irq[0]);
return 0;
out_no_irq:
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index d55e643..9b5fc50 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -576,9 +576,9 @@
};
struct qeth_ipato {
- int enabled;
- int invert4;
- int invert6;
+ bool enabled;
+ bool invert4;
+ bool invert6;
struct list_head entries;
};
@@ -969,7 +969,8 @@
int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role);
int qeth_bridgeport_an_set(struct qeth_card *card, int enable);
int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int);
-int qeth_get_elements_no(struct qeth_card *, struct sk_buff *, int);
+int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb,
+ int extra_elems, int data_offset);
int qeth_get_elements_for_frags(struct sk_buff *);
int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *,
struct sk_buff *, struct qeth_hdr *, int, int, int);
@@ -1004,6 +1005,9 @@
int qeth_set_features(struct net_device *, netdev_features_t);
int qeth_recover_features(struct net_device *);
netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
+netdev_features_t qeth_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features);
/* exports for OSN */
int qeth_osn_assist(struct net_device *, void *, int);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 21ef802..df8f74c 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -19,6 +19,11 @@
#include <linux/mii.h>
#include <linux/kthread.h>
#include <linux/slab.h>
+#include <linux/if_vlan.h>
+#include <linux/netdevice.h>
+#include <linux/netdev_features.h>
+#include <linux/skbuff.h>
+
#include <net/iucv/af_iucv.h>
#include <net/dsfield.h>
@@ -1470,9 +1475,9 @@
qeth_set_intial_options(card);
/* IP address takeover */
INIT_LIST_HEAD(&card->ipato.entries);
- card->ipato.enabled = 0;
- card->ipato.invert4 = 0;
- card->ipato.invert6 = 0;
+ card->ipato.enabled = false;
+ card->ipato.invert4 = false;
+ card->ipato.invert6 = false;
/* init QDIO stuff */
qeth_init_qdio_info(card);
INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work);
@@ -3837,6 +3842,7 @@
* @card: qeth card structure, to check max. elems.
* @skb: SKB address
* @extra_elems: extra elems needed, to check against max.
+ * @data_offset: range starts at skb->data + data_offset
*
* Returns the number of pages, and thus QDIO buffer elements, needed to cover
* skb data, including linear part and fragments. Checks if the result plus
@@ -3844,10 +3850,10 @@
* Note: extra_elems is not included in the returned result.
*/
int qeth_get_elements_no(struct qeth_card *card,
- struct sk_buff *skb, int extra_elems)
+ struct sk_buff *skb, int extra_elems, int data_offset)
{
int elements = qeth_get_elements_for_range(
- (addr_t)skb->data,
+ (addr_t)skb->data + data_offset,
(addr_t)skb->data + skb_headlen(skb)) +
qeth_get_elements_for_frags(skb);
@@ -6240,6 +6246,32 @@
}
EXPORT_SYMBOL_GPL(qeth_fix_features);
+netdev_features_t qeth_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ /* GSO segmentation builds skbs with
+ * a (small) linear part for the headers, and
+ * page frags for the data.
+ * Compared to a linear skb, the header-only part consumes an
+ * additional buffer element. This reduces buffer utilization, and
+ * hurts throughput. So compress small segments into one element.
+ */
+ if (netif_needs_gso(skb, features)) {
+ /* match skb_segment(): */
+ unsigned int doffset = skb->data - skb_mac_header(skb);
+ unsigned int hsize = skb_shinfo(skb)->gso_size;
+ unsigned int hroom = skb_headroom(skb);
+
+ /* linearize only if resulting skb allocations are order-0: */
+ if (SKB_DATA_ALIGN(hroom + doffset + hsize) <= SKB_MAX_HEAD(0))
+ features &= ~NETIF_F_SG;
+ }
+
+ return vlan_features_check(skb, features);
+}
+EXPORT_SYMBOL_GPL(qeth_features_check);
+
static int __init qeth_core_init(void)
{
int rc;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 8530477..5082dfe 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -865,7 +865,7 @@
* chaining we can not send long frag lists
*/
if ((card->info.type != QETH_CARD_TYPE_IQD) &&
- !qeth_get_elements_no(card, new_skb, 0)) {
+ !qeth_get_elements_no(card, new_skb, 0, 0)) {
int lin_rc = skb_linearize(new_skb);
if (card->options.performance_stats) {
@@ -910,7 +910,8 @@
}
}
- elements = qeth_get_elements_no(card, new_skb, elements_needed);
+ elements = qeth_get_elements_no(card, new_skb, elements_needed,
+ (data_offset > 0) ? data_offset : 0);
if (!elements) {
if (data_offset >= 0)
kmem_cache_free(qeth_core_header_cache, hdr);
@@ -1084,6 +1085,7 @@
.ndo_stop = qeth_l2_stop,
.ndo_get_stats = qeth_get_stats,
.ndo_start_xmit = qeth_l2_hard_start_xmit,
+ .ndo_features_check = qeth_features_check,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = qeth_l2_set_rx_mode,
.ndo_do_ioctl = qeth_l2_do_ioctl,
@@ -1128,6 +1130,7 @@
if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
card->dev->hw_features = NETIF_F_SG;
card->dev->vlan_features = NETIF_F_SG;
+ card->dev->features |= NETIF_F_SG;
/* OSA 3S and earlier has no RX/TX support */
if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
card->dev->hw_features |= NETIF_F_IP_CSUM;
@@ -1140,8 +1143,6 @@
}
card->info.broadcast_capable = 1;
qeth_l2_request_initial_mac(card);
- card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
- PAGE_SIZE;
SET_NETDEV_DEV(card->dev, &card->gdev->dev);
netif_napi_add(card->dev, &card->napi, qeth_l2_poll, QETH_NAPI_WEIGHT);
netif_carrier_off(card->dev);
diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index 26f7953..eedf9b0 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -80,7 +80,7 @@
int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *);
void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions,
const u8 *);
-int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *, struct qeth_ipaddr *);
+void qeth_l3_update_ipato(struct qeth_card *card);
struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions);
int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *);
int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 03a2619..1487f8a 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -168,8 +168,8 @@
}
}
-int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
- struct qeth_ipaddr *addr)
+static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
+ struct qeth_ipaddr *addr)
{
struct qeth_ipato_entry *ipatoe;
u8 addr_bits[128] = {0, };
@@ -178,6 +178,8 @@
if (!card->ipato.enabled)
return 0;
+ if (addr->type != QETH_IP_TYPE_NORMAL)
+ return 0;
qeth_l3_convert_addr_to_bits((u8 *) &addr->u, addr_bits,
(addr->proto == QETH_PROT_IPV4)? 4:16);
@@ -293,8 +295,7 @@
memcpy(addr, tmp_addr, sizeof(struct qeth_ipaddr));
addr->ref_counter = 1;
- if (addr->type == QETH_IP_TYPE_NORMAL &&
- qeth_l3_is_addr_covered_by_ipato(card, addr)) {
+ if (qeth_l3_is_addr_covered_by_ipato(card, addr)) {
QETH_CARD_TEXT(card, 2, "tkovaddr");
addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
}
@@ -607,6 +608,27 @@
/*
* IP address takeover related functions
*/
+
+/**
+ * qeth_l3_update_ipato() - Update 'takeover' property, for all NORMAL IPs.
+ *
+ * Caller must hold ip_lock.
+ */
+void qeth_l3_update_ipato(struct qeth_card *card)
+{
+ struct qeth_ipaddr *addr;
+ unsigned int i;
+
+ hash_for_each(card->ip_htable, i, addr, hnode) {
+ if (addr->type != QETH_IP_TYPE_NORMAL)
+ continue;
+ if (qeth_l3_is_addr_covered_by_ipato(card, addr))
+ addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
+ else
+ addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG;
+ }
+}
+
static void qeth_l3_clear_ipato_list(struct qeth_card *card)
{
struct qeth_ipato_entry *ipatoe, *tmp;
@@ -618,6 +640,7 @@
kfree(ipatoe);
}
+ qeth_l3_update_ipato(card);
spin_unlock_bh(&card->ip_lock);
}
@@ -642,8 +665,10 @@
}
}
- if (!rc)
+ if (!rc) {
list_add_tail(&new->entry, &card->ipato.entries);
+ qeth_l3_update_ipato(card);
+ }
spin_unlock_bh(&card->ip_lock);
@@ -666,6 +691,7 @@
(proto == QETH_PROT_IPV4)? 4:16) &&
(ipatoe->mask_bits == mask_bits)) {
list_del(&ipatoe->entry);
+ qeth_l3_update_ipato(card);
kfree(ipatoe);
}
}
@@ -1416,6 +1442,7 @@
tmp->u.a4.addr = im4->multiaddr;
memcpy(tmp->mac, buf, sizeof(tmp->mac));
+ tmp->is_multicast = 1;
ipm = qeth_l3_ip_from_hash(card, tmp);
if (ipm) {
@@ -1593,7 +1620,7 @@
addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
if (!addr)
- return;
+ goto out;
spin_lock_bh(&card->ip_lock);
@@ -1607,6 +1634,7 @@
spin_unlock_bh(&card->ip_lock);
kfree(addr);
+out:
in_dev_put(in_dev);
}
@@ -1631,7 +1659,7 @@
addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
if (!addr)
- return;
+ goto out;
spin_lock_bh(&card->ip_lock);
@@ -1646,6 +1674,7 @@
spin_unlock_bh(&card->ip_lock);
kfree(addr);
+out:
in6_dev_put(in6_dev);
#endif /* CONFIG_QETH_IPV6 */
}
@@ -2609,17 +2638,13 @@
char daddr[16];
struct af_iucv_trans_hdr *iucv_hdr;
- skb_pull(skb, 14);
- card->dev->header_ops->create(skb, card->dev, 0,
- card->dev->dev_addr, card->dev->dev_addr,
- card->dev->addr_len);
- skb_pull(skb, 14);
- iucv_hdr = (struct af_iucv_trans_hdr *)skb->data;
memset(hdr, 0, sizeof(struct qeth_hdr));
hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3;
hdr->hdr.l3.ext_flags = 0;
- hdr->hdr.l3.length = skb->len;
+ hdr->hdr.l3.length = skb->len - ETH_HLEN;
hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST;
+
+ iucv_hdr = (struct af_iucv_trans_hdr *) (skb->data + ETH_HLEN);
memset(daddr, 0, sizeof(daddr));
daddr[0] = 0xfe;
daddr[1] = 0x80;
@@ -2823,10 +2848,7 @@
if ((card->info.type == QETH_CARD_TYPE_IQD) &&
!skb_is_nonlinear(skb)) {
new_skb = skb;
- if (new_skb->protocol == ETH_P_AF_IUCV)
- data_offset = 0;
- else
- data_offset = ETH_HLEN;
+ data_offset = ETH_HLEN;
hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
if (!hdr)
goto tx_drop;
@@ -2867,7 +2889,7 @@
*/
if ((card->info.type != QETH_CARD_TYPE_IQD) &&
((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) ||
- (!use_tso && !qeth_get_elements_no(card, new_skb, 0)))) {
+ (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0)))) {
int lin_rc = skb_linearize(new_skb);
if (card->options.performance_stats) {
@@ -2909,7 +2931,8 @@
elements = use_tso ?
qeth_l3_get_elements_no_tso(card, new_skb, hdr_elements) :
- qeth_get_elements_no(card, new_skb, hdr_elements);
+ qeth_get_elements_no(card, new_skb, hdr_elements,
+ (data_offset > 0) ? data_offset : 0);
if (!elements) {
if (data_offset >= 0)
kmem_cache_free(qeth_core_header_cache, hdr);
@@ -3064,6 +3087,7 @@
.ndo_stop = qeth_l3_stop,
.ndo_get_stats = qeth_get_stats,
.ndo_start_xmit = qeth_l3_hard_start_xmit,
+ .ndo_features_check = qeth_features_check,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_rx_mode = qeth_l3_set_multicast_list,
.ndo_do_ioctl = qeth_l3_do_ioctl,
@@ -3120,6 +3144,7 @@
card->dev->vlan_features = NETIF_F_SG |
NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
NETIF_F_TSO;
+ card->dev->features |= NETIF_F_SG;
}
}
} else if (card->info.type == QETH_CARD_TYPE_IQD) {
@@ -3145,8 +3170,8 @@
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER;
netif_keep_dst(card->dev);
- card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
- PAGE_SIZE;
+ netif_set_gso_max_size(card->dev, (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
+ PAGE_SIZE);
SET_NETDEV_DEV(card->dev, &card->gdev->dev);
netif_napi_add(card->dev, &card->napi, qeth_l3_poll, QETH_NAPI_WEIGHT);
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index cffe42f..d6bdfc6 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -372,8 +372,8 @@
struct device_attribute *attr, const char *buf, size_t count)
{
struct qeth_card *card = dev_get_drvdata(dev);
- struct qeth_ipaddr *addr;
- int i, rc = 0;
+ bool enable;
+ int rc = 0;
if (!card)
return -EINVAL;
@@ -386,25 +386,18 @@
}
if (sysfs_streq(buf, "toggle")) {
- card->ipato.enabled = (card->ipato.enabled)? 0 : 1;
- } else if (sysfs_streq(buf, "1")) {
- card->ipato.enabled = 1;
- hash_for_each(card->ip_htable, i, addr, hnode) {
- if ((addr->type == QETH_IP_TYPE_NORMAL) &&
- qeth_l3_is_addr_covered_by_ipato(card, addr))
- addr->set_flags |=
- QETH_IPA_SETIP_TAKEOVER_FLAG;
- }
- } else if (sysfs_streq(buf, "0")) {
- card->ipato.enabled = 0;
- hash_for_each(card->ip_htable, i, addr, hnode) {
- if (addr->set_flags &
- QETH_IPA_SETIP_TAKEOVER_FLAG)
- addr->set_flags &=
- ~QETH_IPA_SETIP_TAKEOVER_FLAG;
- }
- } else
+ enable = !card->ipato.enabled;
+ } else if (kstrtobool(buf, &enable)) {
rc = -EINVAL;
+ goto out;
+ }
+
+ if (card->ipato.enabled != enable) {
+ card->ipato.enabled = enable;
+ spin_lock_bh(&card->ip_lock);
+ qeth_l3_update_ipato(card);
+ spin_unlock_bh(&card->ip_lock);
+ }
out:
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
@@ -430,20 +423,27 @@
const char *buf, size_t count)
{
struct qeth_card *card = dev_get_drvdata(dev);
+ bool invert;
int rc = 0;
if (!card)
return -EINVAL;
mutex_lock(&card->conf_mutex);
- if (sysfs_streq(buf, "toggle"))
- card->ipato.invert4 = (card->ipato.invert4)? 0 : 1;
- else if (sysfs_streq(buf, "1"))
- card->ipato.invert4 = 1;
- else if (sysfs_streq(buf, "0"))
- card->ipato.invert4 = 0;
- else
+ if (sysfs_streq(buf, "toggle")) {
+ invert = !card->ipato.invert4;
+ } else if (kstrtobool(buf, &invert)) {
rc = -EINVAL;
+ goto out;
+ }
+
+ if (card->ipato.invert4 != invert) {
+ card->ipato.invert4 = invert;
+ spin_lock_bh(&card->ip_lock);
+ qeth_l3_update_ipato(card);
+ spin_unlock_bh(&card->ip_lock);
+ }
+out:
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
}
@@ -609,20 +609,27 @@
struct device_attribute *attr, const char *buf, size_t count)
{
struct qeth_card *card = dev_get_drvdata(dev);
+ bool invert;
int rc = 0;
if (!card)
return -EINVAL;
mutex_lock(&card->conf_mutex);
- if (sysfs_streq(buf, "toggle"))
- card->ipato.invert6 = (card->ipato.invert6)? 0 : 1;
- else if (sysfs_streq(buf, "1"))
- card->ipato.invert6 = 1;
- else if (sysfs_streq(buf, "0"))
- card->ipato.invert6 = 0;
- else
+ if (sysfs_streq(buf, "toggle")) {
+ invert = !card->ipato.invert6;
+ } else if (kstrtobool(buf, &invert)) {
rc = -EINVAL;
+ goto out;
+ }
+
+ if (card->ipato.invert6 != invert) {
+ card->ipato.invert6 = invert;
+ spin_lock_bh(&card->ip_lock);
+ qeth_l3_update_ipato(card);
+ spin_unlock_bh(&card->ip_lock);
+ }
+out:
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
}
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 0aeecec..e2962f1 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1416,13 +1416,13 @@
* will ensure that i/o is queisced and the card is flushed in that
* case.
*/
+ aac_free_irq(aac);
aac_fib_map_free(aac);
pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr, aac->comm_phys);
aac->comm_addr = NULL;
aac->comm_phys = 0;
kfree(aac->queues);
aac->queues = NULL;
- aac_free_irq(aac);
kfree(aac->fsa_dev);
aac->fsa_dev = NULL;
quirks = aac_get_driver_ident(index)->quirks;
diff --git a/drivers/scsi/bfa/bfad_debugfs.c b/drivers/scsi/bfa/bfad_debugfs.c
index 8dcd8c7..05f5239 100644
--- a/drivers/scsi/bfa/bfad_debugfs.c
+++ b/drivers/scsi/bfa/bfad_debugfs.c
@@ -255,7 +255,8 @@
struct bfad_s *bfad = port->bfad;
struct bfa_s *bfa = &bfad->bfa;
struct bfa_ioc_s *ioc = &bfa->ioc;
- int addr, len, rc, i;
+ int addr, rc, i;
+ u32 len;
u32 *regbuf;
void __iomem *rb, *reg_addr;
unsigned long flags;
@@ -266,7 +267,7 @@
return PTR_ERR(kern_buf);
rc = sscanf(kern_buf, "%x:%x", &addr, &len);
- if (rc < 2) {
+ if (rc < 2 || len > (UINT_MAX >> 2)) {
printk(KERN_INFO
"bfad[%d]: %s failed to read user buf\n",
bfad->inst_no, __func__);
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index 0039beb..358ec32 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -1347,6 +1347,7 @@
csk, csk->state, csk->flags, csk->tid);
cxgbi_sock_free_cpl_skbs(csk);
+ cxgbi_sock_purge_write_queue(csk);
if (csk->wr_cred != csk->wr_max_cred) {
cxgbi_sock_purge_wr_queue(csk);
cxgbi_sock_reset_wr_list(csk);
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index a1d6ab7..0b8db8a 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -2951,7 +2951,7 @@
/* fill_cmd can't fail here, no data buffer to map. */
(void) fill_cmd(c, reset_type, h, NULL, 0, 0,
scsi3addr, TYPE_MSG);
- rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT);
+ rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
if (rc) {
dev_warn(&h->pdev->dev, "Failed to send reset command\n");
goto out;
@@ -3686,7 +3686,7 @@
* # (integer code indicating one of several NOT READY states
* describing why a volume is to be kept offline)
*/
-static int hpsa_volume_offline(struct ctlr_info *h,
+static unsigned char hpsa_volume_offline(struct ctlr_info *h,
unsigned char scsi3addr[])
{
struct CommandList *c;
@@ -3707,7 +3707,7 @@
DEFAULT_TIMEOUT);
if (rc) {
cmd_free(h, c);
- return 0;
+ return HPSA_VPD_LV_STATUS_UNSUPPORTED;
}
sense = c->err_info->SenseInfo;
if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo))
@@ -3718,19 +3718,13 @@
cmd_status = c->err_info->CommandStatus;
scsi_status = c->err_info->ScsiStatus;
cmd_free(h, c);
- /* Is the volume 'not ready'? */
- if (cmd_status != CMD_TARGET_STATUS ||
- scsi_status != SAM_STAT_CHECK_CONDITION ||
- sense_key != NOT_READY ||
- asc != ASC_LUN_NOT_READY) {
- return 0;
- }
/* Determine the reason for not ready state */
ldstat = hpsa_get_volume_status(h, scsi3addr);
/* Keep volume offline in certain cases: */
switch (ldstat) {
+ case HPSA_LV_FAILED:
case HPSA_LV_UNDERGOING_ERASE:
case HPSA_LV_NOT_AVAILABLE:
case HPSA_LV_UNDERGOING_RPI:
@@ -3752,7 +3746,7 @@
default:
break;
}
- return 0;
+ return HPSA_LV_OK;
}
/*
@@ -3825,10 +3819,10 @@
/* Do an inquiry to the device to see what it is. */
if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff,
(unsigned char) OBDR_TAPE_INQ_SIZE) != 0) {
- /* Inquiry failed (msg printed already) */
dev_err(&h->pdev->dev,
- "hpsa_update_device_info: inquiry failed\n");
- rc = -EIO;
+ "%s: inquiry failed, device will be skipped.\n",
+ __func__);
+ rc = HPSA_INQUIRY_FAILED;
goto bail_out;
}
@@ -3857,15 +3851,20 @@
if ((this_device->devtype == TYPE_DISK ||
this_device->devtype == TYPE_ZBC) &&
is_logical_dev_addr_mode(scsi3addr)) {
- int volume_offline;
+ unsigned char volume_offline;
hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level);
if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
hpsa_get_ioaccel_status(h, scsi3addr, this_device);
volume_offline = hpsa_volume_offline(h, scsi3addr);
- if (volume_offline < 0 || volume_offline > 0xff)
- volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED;
- this_device->volume_offline = volume_offline & 0xff;
+ this_device->volume_offline = volume_offline;
+ if (volume_offline == HPSA_LV_FAILED) {
+ rc = HPSA_LV_FAILED;
+ dev_err(&h->pdev->dev,
+ "%s: LV failed, device will be skipped.\n",
+ __func__);
+ goto bail_out;
+ }
} else {
this_device->raid_level = RAID_UNKNOWN;
this_device->offload_config = 0;
@@ -4353,8 +4352,7 @@
goto out;
}
if (rc) {
- dev_warn(&h->pdev->dev,
- "Inquiry failed, skipping device.\n");
+ h->drv_req_rescan = 1;
continue;
}
@@ -5532,7 +5530,7 @@
spin_lock_irqsave(&h->scan_lock, flags);
h->scan_finished = 1;
- wake_up_all(&h->scan_wait_queue);
+ wake_up(&h->scan_wait_queue);
spin_unlock_irqrestore(&h->scan_lock, flags);
}
@@ -5550,11 +5548,23 @@
if (unlikely(lockup_detected(h)))
return hpsa_scan_complete(h);
+ /*
+ * If a scan is already waiting to run, no need to add another
+ */
+ spin_lock_irqsave(&h->scan_lock, flags);
+ if (h->scan_waiting) {
+ spin_unlock_irqrestore(&h->scan_lock, flags);
+ return;
+ }
+
+ spin_unlock_irqrestore(&h->scan_lock, flags);
+
/* wait until any scan already in progress is finished. */
while (1) {
spin_lock_irqsave(&h->scan_lock, flags);
if (h->scan_finished)
break;
+ h->scan_waiting = 1;
spin_unlock_irqrestore(&h->scan_lock, flags);
wait_event(h->scan_wait_queue, h->scan_finished);
/* Note: We don't need to worry about a race between this
@@ -5564,6 +5574,7 @@
*/
}
h->scan_finished = 0; /* mark scan as in progress */
+ h->scan_waiting = 0;
spin_unlock_irqrestore(&h->scan_lock, flags);
if (unlikely(lockup_detected(h)))
@@ -8802,6 +8813,7 @@
init_waitqueue_head(&h->event_sync_wait_queue);
mutex_init(&h->reset_mutex);
h->scan_finished = 1; /* no scan currently in progress */
+ h->scan_waiting = 0;
pci_set_drvdata(pdev, h);
h->ndevices = 0;
@@ -9094,6 +9106,8 @@
destroy_workqueue(h->rescan_ctlr_wq);
destroy_workqueue(h->resubmit_wq);
+ hpsa_delete_sas_host(h);
+
/*
* Call before disabling interrupts.
* scsi_remove_host can trigger I/O operations especially
@@ -9128,8 +9142,6 @@
h->lockup_detected = NULL; /* init_one 2 */
/* (void) pci_disable_pcie_error_reporting(pdev); */ /* init_one 1 */
- hpsa_delete_sas_host(h);
-
kfree(h); /* init_one 1 */
}
@@ -9621,9 +9633,9 @@
struct sas_phy *phy = hpsa_sas_phy->phy;
sas_port_delete_phy(hpsa_sas_phy->parent_port->port, phy);
- sas_phy_free(phy);
if (hpsa_sas_phy->added_to_port)
list_del(&hpsa_sas_phy->phy_list_entry);
+ sas_phy_delete(phy);
kfree(hpsa_sas_phy);
}
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index 9ea162d..e16f294 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -203,6 +203,7 @@
dma_addr_t errinfo_pool_dhandle;
unsigned long *cmd_pool_bits;
int scan_finished;
+ u8 scan_waiting : 1;
spinlock_t scan_lock;
wait_queue_head_t scan_wait_queue;
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index a584cdf..5961705 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -156,6 +156,7 @@
#define CFGTBL_BusType_Fibre2G 0x00000200l
/* VPD Inquiry types */
+#define HPSA_INQUIRY_FAILED 0x02
#define HPSA_VPD_SUPPORTED_PAGES 0x00
#define HPSA_VPD_LV_DEVICE_ID 0x83
#define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1
@@ -166,6 +167,7 @@
/* Logical volume states */
#define HPSA_VPD_LV_STATUS_UNSUPPORTED 0xff
#define HPSA_LV_OK 0x0
+#define HPSA_LV_FAILED 0x01
#define HPSA_LV_NOT_AVAILABLE 0x0b
#define HPSA_LV_UNDERGOING_ERASE 0x0F
#define HPSA_LV_UNDERGOING_RPI 0x12
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index a530f08..4abd3fc 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1727,7 +1727,7 @@
if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
reason = FAILURE_SESSION_IN_RECOVERY;
- sc->result = DID_REQUEUE;
+ sc->result = DID_REQUEUE << 16;
goto fault;
}
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 4df3cdc..fc7adda 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -7782,7 +7782,8 @@
did, vport->port_state, ndlp->nlp_flag);
phba->fc_stat.elsRcvPRLI++;
- if (vport->port_state < LPFC_DISC_AUTH) {
+ if ((vport->port_state < LPFC_DISC_AUTH) &&
+ (vport->fc_flag & FC_FABRIC)) {
rjt_err = LSRJT_UNABLE_TPC;
rjt_exp = LSEXP_NOTHING_MORE;
break;
@@ -8185,11 +8186,17 @@
spin_lock_irq(shost->host_lock);
vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
spin_unlock_irq(shost->host_lock);
- if (vport->port_type == LPFC_PHYSICAL_PORT
- && !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG))
- lpfc_issue_init_vfi(vport);
- else
+ if (mb->mbxStatus == MBX_NOT_FINISHED)
+ break;
+ if ((vport->port_type == LPFC_PHYSICAL_PORT) &&
+ !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) {
+ if (phba->sli_rev == LPFC_SLI_REV4)
+ lpfc_issue_init_vfi(vport);
+ else
+ lpfc_initial_flogi(vport);
+ } else {
lpfc_initial_fdisc(vport);
+ }
break;
}
} else {
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index ed22393..7d2ad63 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -4784,7 +4784,8 @@
lpfc_cancel_retry_delay_tmo(vport, ndlp);
if ((ndlp->nlp_flag & NLP_DEFER_RM) &&
!(ndlp->nlp_flag & NLP_REG_LOGIN_SEND) &&
- !(ndlp->nlp_flag & NLP_RPI_REGISTERED)) {
+ !(ndlp->nlp_flag & NLP_RPI_REGISTERED) &&
+ phba->sli_rev != LPFC_SLI_REV4) {
/* For this case we need to cleanup the default rpi
* allocated by the firmware.
*/
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 55faa94..2a436df 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -3232,7 +3232,7 @@
#define MB_CEQ_STATUS_QUEUE_FLUSHING 0x4
#define MB_CQE_STATUS_DMA_FAILED 0x5
-#define LPFC_MBX_WR_CONFIG_MAX_BDE 8
+#define LPFC_MBX_WR_CONFIG_MAX_BDE 1
struct lpfc_mbx_wr_object {
struct mbox_header header;
union {
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 289374c..468acab 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -4770,6 +4770,11 @@
} else if (log_info == VIRTUAL_IO_FAILED_RETRY) {
scmd->result = DID_RESET << 16;
break;
+ } else if ((scmd->device->channel == RAID_CHANNEL) &&
+ (scsi_state == (MPI2_SCSI_STATE_TERMINATED |
+ MPI2_SCSI_STATE_NO_SCSI_STATUS))) {
+ scmd->result = DID_RESET << 16;
+ break;
}
scmd->result = DID_SOFT_ERROR << 16;
break;
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 658e4d1..ce4ac76 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -2707,13 +2707,9 @@
"%-+5d 0 1 2 3 4 5 6 7 8 9 A B C D E F\n", size);
ql_dbg(level, vha, id,
"----- -----------------------------------------------\n");
- for (cnt = 0; cnt < size; cnt++, buf++) {
- if (cnt % 16 == 0)
- ql_dbg(level, vha, id, "%04x:", cnt & ~0xFU);
- printk(" %02x", *buf);
- if (cnt % 16 == 15)
- printk("\n");
+ for (cnt = 0; cnt < size; cnt += 16) {
+ ql_dbg(level, vha, id, "%04x: ", cnt);
+ print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1,
+ buf + cnt, min(16U, size - cnt), false);
}
- if (cnt % 16 != 0)
- printk("\n");
}
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 91f5f55..59059ff 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -668,11 +668,9 @@
{
struct qla_hw_data *ha = vha->hw;
struct qla_tgt_sess *sess = NULL;
- uint32_t unpacked_lun, lun = 0;
uint16_t loop_id;
int res = 0;
struct imm_ntfy_from_isp *n = (struct imm_ntfy_from_isp *)iocb;
- struct atio_from_isp *a = (struct atio_from_isp *)iocb;
unsigned long flags;
loop_id = le16_to_cpu(n->u.isp24.nport_handle);
@@ -725,11 +723,7 @@
"loop_id %d)\n", vha->host_no, sess, sess->port_name,
mcmd, loop_id);
- lun = a->u.isp24.fcp_cmnd.lun;
- unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
-
- return qlt_issue_task_mgmt(sess, unpacked_lun, mcmd,
- iocb, QLA24XX_MGMT_SEND_NACK);
+ return qlt_issue_task_mgmt(sess, 0, mcmd, iocb, QLA24XX_MGMT_SEND_NACK);
}
/* ha->tgt.sess_lock supposed to be held on entry */
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index cf04a36..2b0e615 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -2996,11 +2996,11 @@
if (-1 == ret) {
write_unlock_irqrestore(&atomic_rw, iflags);
return DID_ERROR << 16;
- } else if (sdebug_verbose && (ret < (num * sdebug_sector_size)))
+ } else if (sdebug_verbose && !ndob && (ret < sdebug_sector_size))
sdev_printk(KERN_INFO, scp->device,
- "%s: %s: cdb indicated=%u, IO sent=%d bytes\n",
+ "%s: %s: lb size=%u, IO sent=%d bytes\n",
my_name, "write same",
- num * sdebug_sector_size, ret);
+ sdebug_sector_size, ret);
/* Copy first sector to remaining blocks */
for (i = 1 ; i < num ; i++)
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 2464569..26e6b05 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -160,7 +160,7 @@
{"DGC", "RAID", NULL, BLIST_SPARSELUN}, /* Dell PV 650F, storage on LUN 0 */
{"DGC", "DISK", NULL, BLIST_SPARSELUN}, /* Dell PV 650F, no storage on LUN 0 */
{"EMC", "Invista", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
- {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_FORCELUN},
+ {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_REPORTLUN2},
{"EMULEX", "MD21/S2 ESDI", NULL, BLIST_SINGLELUN},
{"easyRAID", "16P", NULL, BLIST_NOREPORTLUN},
{"easyRAID", "X6P", NULL, BLIST_NOREPORTLUN},
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d8099c7..c7b7700 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2041,11 +2041,13 @@
q->limits.cluster = 0;
/*
- * set a reasonable default alignment on word boundaries: the
- * host and device may alter it using
- * blk_queue_update_dma_alignment() later.
+ * Set a reasonable default alignment: The larger of 32-byte (dword),
+ * which is a common minimum for HBAs, and the minimum DMA alignment,
+ * which is set by the platform.
+ *
+ * Devices that require a bigger alignment can increase it later.
*/
- blk_queue_dma_alignment(q, 0x03);
+ blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment()) - 1);
}
struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 09fa1fd..ace56c5 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -234,11 +234,15 @@
{
struct scsi_disk *sdkp = to_scsi_disk(dev);
struct scsi_device *sdp = sdkp->device;
+ bool v;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- sdp->manage_start_stop = simple_strtoul(buf, NULL, 10);
+ if (kstrtobool(buf, &v))
+ return -EINVAL;
+
+ sdp->manage_start_stop = v;
return count;
}
@@ -256,6 +260,7 @@
allow_restart_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
+ bool v;
struct scsi_disk *sdkp = to_scsi_disk(dev);
struct scsi_device *sdp = sdkp->device;
@@ -265,7 +270,10 @@
if (sdp->type != TYPE_DISK)
return -EINVAL;
- sdp->allow_restart = simple_strtoul(buf, NULL, 10);
+ if (kstrtobool(buf, &v))
+ return -EINVAL;
+
+ sdp->allow_restart = v;
return count;
}
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 184c7db..cd9537d 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -149,7 +149,6 @@
struct list_head rq_list; /* head of request list */
struct fasync_struct *async_qp; /* used by asynchronous notification */
Sg_request req_arr[SG_MAX_QUEUE]; /* used as singly-linked list */
- char low_dma; /* as in parent but possibly overridden to 1 */
char force_packid; /* 1 -> pack_id input to read(), 0 -> ignored */
char cmd_q; /* 1 -> allow command queuing, 0 -> don't */
unsigned char next_cmd_len; /* 0: automatic, >0: use on next write() */
@@ -922,24 +921,14 @@
/* strange ..., for backward compatibility */
return sfp->timeout_user;
case SG_SET_FORCE_LOW_DMA:
- result = get_user(val, ip);
- if (result)
- return result;
- if (val) {
- sfp->low_dma = 1;
- if ((0 == sfp->low_dma) && !sfp->res_in_use) {
- val = (int) sfp->reserve.bufflen;
- sg_remove_scat(sfp, &sfp->reserve);
- sg_build_reserve(sfp, val);
- }
- } else {
- if (atomic_read(&sdp->detaching))
- return -ENODEV;
- sfp->low_dma = sdp->device->host->unchecked_isa_dma;
- }
+ /*
+ * N.B. This ioctl never worked properly, but failed to
+ * return an error value. So returning '0' to keep compability
+ * with legacy applications.
+ */
return 0;
case SG_GET_LOW_DMA:
- return put_user((int) sfp->low_dma, ip);
+ return put_user((int) sdp->device->host->unchecked_isa_dma, ip);
case SG_GET_SCSI_ID:
if (!access_ok(VERIFY_WRITE, p, sizeof (sg_scsi_id_t)))
return -EFAULT;
@@ -1860,6 +1849,7 @@
int sg_tablesize = sfp->parentdp->sg_tablesize;
int blk_size = buff_size, order;
gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
+ struct sg_device *sdp = sfp->parentdp;
if (blk_size < 0)
return -EFAULT;
@@ -1885,7 +1875,7 @@
scatter_elem_sz_prev = num;
}
- if (sfp->low_dma)
+ if (sdp->device->host->unchecked_isa_dma)
gfp_mask |= GFP_DMA;
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
@@ -2148,8 +2138,6 @@
sfp->timeout = SG_DEFAULT_TIMEOUT;
sfp->timeout_user = SG_DEFAULT_TIMEOUT_USER;
sfp->force_packid = SG_DEF_FORCE_PACK_ID;
- sfp->low_dma = (SG_DEF_FORCE_LOW_DMA == 0) ?
- sdp->device->host->unchecked_isa_dma : 1;
sfp->cmd_q = SG_DEF_COMMAND_Q;
sfp->keep_orphan = SG_DEF_KEEP_ORPHAN;
sfp->parentdp = sdp;
@@ -2608,7 +2596,7 @@
jiffies_to_msecs(fp->timeout),
fp->reserve.bufflen,
(int) fp->reserve.k_use_sg,
- (int) fp->low_dma);
+ (int) sdp->device->host->unchecked_isa_dma);
seq_printf(s, " cmd_q=%d f_packid=%d k_orphan=%d closed=0\n",
(int) fp->cmd_q, (int) fp->force_packid,
(int) fp->keep_orphan);
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index f0a015a..fb4dafb 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3622,10 +3622,10 @@
completion = ktime_get();
delta_us = ktime_us_delta(completion,
req->lat_hist_io_start);
- /* rq_data_dir() => true if WRITE */
- blk_update_latency_hist(&hba->io_lat_s,
- (rq_data_dir(req) == READ),
- delta_us);
+ blk_update_latency_hist(
+ (rq_data_dir(req) == READ) ?
+ &hba->io_lat_read :
+ &hba->io_lat_write, delta_us);
}
}
/* Do not touch lrbp after scsi done */
@@ -5356,12 +5356,15 @@
struct ufs_vreg *vreg, bool on)
{
int ret = 0;
- struct regulator *reg = vreg->reg;
- const char *name = vreg->name;
+ struct regulator *reg;
+ const char *name;
int min_uV, uA_load;
BUG_ON(!vreg);
+ reg = vreg->reg;
+ name = vreg->name;
+
if (regulator_count_voltages(reg) > 0) {
min_uV = on ? vreg->min_uV : 0;
ret = regulator_set_voltage(reg, min_uV, vreg->max_uV);
@@ -6371,9 +6374,10 @@
if (kstrtol(buf, 0, &value))
return -EINVAL;
- if (value == BLK_IO_LAT_HIST_ZERO)
- blk_zero_latency_hist(&hba->io_lat_s);
- else if (value == BLK_IO_LAT_HIST_ENABLE ||
+ if (value == BLK_IO_LAT_HIST_ZERO) {
+ memset(&hba->io_lat_read, 0, sizeof(hba->io_lat_read));
+ memset(&hba->io_lat_write, 0, sizeof(hba->io_lat_write));
+ } else if (value == BLK_IO_LAT_HIST_ENABLE ||
value == BLK_IO_LAT_HIST_DISABLE)
hba->latency_hist_enabled = value;
return count;
@@ -6384,8 +6388,14 @@
char *buf)
{
struct ufs_hba *hba = dev_get_drvdata(dev);
+ size_t written_bytes;
- return blk_latency_hist_show(&hba->io_lat_s, buf);
+ written_bytes = blk_latency_hist_show("Read", &hba->io_lat_read,
+ buf, PAGE_SIZE);
+ written_bytes += blk_latency_hist_show("Write", &hba->io_lat_write,
+ buf + written_bytes, PAGE_SIZE - written_bytes);
+
+ return written_bytes;
}
static DEVICE_ATTR(latency_hist, S_IRUGO | S_IWUSR,
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index dbe68b2..b35a5b9 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -564,7 +564,8 @@
enum bkops_status urgent_bkops_lvl;
bool is_urgent_bkops_lvl_checked;
int latency_hist_enabled;
- struct io_latency_state io_lat_s;
+ struct io_latency_state io_lat_read;
+ struct io_latency_state io_lat_write;
};
/* Returns true if clocks can be gated. Otherwise false */
diff --git a/drivers/soc/mediatek/mtk-pmic-wrap.c b/drivers/soc/mediatek/mtk-pmic-wrap.c
index a5f1093..e929f51 100644
--- a/drivers/soc/mediatek/mtk-pmic-wrap.c
+++ b/drivers/soc/mediatek/mtk-pmic-wrap.c
@@ -522,7 +522,7 @@
u32 int_en_all;
u32 spi_w;
u32 wdt_src;
- int has_bridge:1;
+ unsigned int has_bridge:1;
int (*init_reg_clock)(struct pmic_wrapper *wrp);
int (*init_soc_specific)(struct pmic_wrapper *wrp);
};
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 8e281e4..b799547 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -365,7 +365,6 @@
config SPI_FSL_DSPI
tristate "Freescale DSPI controller"
select REGMAP_MMIO
- depends on HAS_DMA
depends on SOC_VF610 || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST
help
This enables support for the Freescale DSPI controller in master
diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index c1eafbd..da51fed 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -553,7 +553,7 @@
static int spi_engine_remove(struct platform_device *pdev)
{
- struct spi_master *master = platform_get_drvdata(pdev);
+ struct spi_master *master = spi_master_get(platform_get_drvdata(pdev));
struct spi_engine *spi_engine = spi_master_get_devdata(master);
int irq = platform_get_irq(pdev, 0);
@@ -561,6 +561,8 @@
free_irq(irq, master);
+ spi_master_put(master);
+
writel_relaxed(0xff, spi_engine->base + SPI_ENGINE_REG_INT_PENDING);
writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_INT_ENABLE);
writel_relaxed(0x01, spi_engine->base + SPI_ENGINE_REG_RESET);
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index deb782f..a6e34f0 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -1307,12 +1307,23 @@
{
struct spi_master *master = platform_get_drvdata(pdev);
struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
+ int ret;
spi_bitbang_stop(&spi_imx->bitbang);
+ ret = clk_enable(spi_imx->clk_per);
+ if (ret)
+ return ret;
+
+ ret = clk_enable(spi_imx->clk_ipg);
+ if (ret) {
+ clk_disable(spi_imx->clk_per);
+ return ret;
+ }
+
writel(0, spi_imx->base + MXC_CSPICTRL);
- clk_unprepare(spi_imx->clk_ipg);
- clk_unprepare(spi_imx->clk_per);
+ clk_disable_unprepare(spi_imx->clk_ipg);
+ clk_disable_unprepare(spi_imx->clk_per);
spi_imx_sdma_exit(spi_imx);
spi_master_put(master);
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 1de3a77..cbf02eb 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -862,7 +862,7 @@
break;
copy32 = copy_bswap32;
} else if (bits <= 16) {
- if (l & 1)
+ if (l & 3)
break;
copy32 = copy_wswap32;
} else {
diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c
index bc7100b..e0b9fe1 100644
--- a/drivers/spi/spi-xilinx.c
+++ b/drivers/spi/spi-xilinx.c
@@ -271,6 +271,7 @@
while (remaining_words) {
int n_words, tx_words, rx_words;
u32 sr;
+ int stalled;
n_words = min(remaining_words, xspi->buffer_size);
@@ -299,7 +300,17 @@
/* Read out all the data from the Rx FIFO */
rx_words = n_words;
+ stalled = 10;
while (rx_words) {
+ if (rx_words == n_words && !(stalled--) &&
+ !(sr & XSPI_SR_TX_EMPTY_MASK) &&
+ (sr & XSPI_SR_RX_EMPTY_MASK)) {
+ dev_err(&spi->dev,
+ "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
+ xspi_init_hw(xspi);
+ return -EIO;
+ }
+
if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
xilinx_spi_rx(xspi);
rx_words--;
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 9435dc5..eb41e84 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -766,10 +766,12 @@
break;
case ASHMEM_SET_SIZE:
ret = -EINVAL;
+ mutex_lock(&ashmem_mutex);
if (!asma->file) {
ret = 0;
asma->size = (size_t)arg;
}
+ mutex_unlock(&ashmem_mutex);
break;
case ASHMEM_GET_SIZE:
ret = asma->size;
diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c
index 8dffd8a..9f01427 100644
--- a/drivers/staging/greybus/light.c
+++ b/drivers/staging/greybus/light.c
@@ -924,6 +924,8 @@
return;
led_classdev_unregister(cdev);
+ kfree(cdev->name);
+ cdev->name = NULL;
channel->led = NULL;
}
diff --git a/drivers/staging/greybus/loopback.c b/drivers/staging/greybus/loopback.c
index 29dc249..3c2c233 100644
--- a/drivers/staging/greybus/loopback.c
+++ b/drivers/staging/greybus/loopback.c
@@ -1034,8 +1034,10 @@
error = gb_loopback_async_sink(gb, size);
}
- if (error)
+ if (error) {
gb->error++;
+ gb->iteration_count++;
+ }
} else {
/* We are effectively single threaded here */
if (type == GB_LOOPBACK_TYPE_PING)
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 9e88021..e8d9db4 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -824,14 +824,15 @@
return conn;
failed_2:
- kiblnd_destroy_conn(conn, true);
+ kiblnd_destroy_conn(conn);
+ LIBCFS_FREE(conn, sizeof(*conn));
failed_1:
LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
failed_0:
return NULL;
}
-void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn)
+void kiblnd_destroy_conn(struct kib_conn *conn)
{
struct rdma_cm_id *cmid = conn->ibc_cmid;
struct kib_peer *peer = conn->ibc_peer;
@@ -894,8 +895,6 @@
rdma_destroy_id(cmid);
atomic_dec(&net->ibn_nconns);
}
-
- LIBCFS_FREE(conn, sizeof(*conn));
}
int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why)
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index 1457697..30cb2f5 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -1018,7 +1018,7 @@
struct kib_conn *kiblnd_create_conn(struct kib_peer *peer,
struct rdma_cm_id *cmid,
int state, int version);
-void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn);
+void kiblnd_destroy_conn(struct kib_conn *conn);
void kiblnd_close_conn(struct kib_conn *conn, int error);
void kiblnd_close_conn_locked(struct kib_conn *conn, int error);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 995f2da..ea9a0c2 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -3323,11 +3323,13 @@
spin_unlock_irqrestore(lock, flags);
dropped_lock = 1;
- kiblnd_destroy_conn(conn, !peer);
+ kiblnd_destroy_conn(conn);
spin_lock_irqsave(lock, flags);
- if (!peer)
+ if (!peer) {
+ kfree(conn);
continue;
+ }
conn->ibc_peer = peer;
if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE)
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 4366918..27333d9 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -401,15 +401,13 @@
result = VM_FAULT_LOCKED;
break;
case -ENODATA:
+ case -EAGAIN:
case -EFAULT:
result = VM_FAULT_NOPAGE;
break;
case -ENOMEM:
result = VM_FAULT_OOM;
break;
- case -EAGAIN:
- result = VM_FAULT_RETRY;
- break;
default:
result = VM_FAULT_SIGBUS;
break;
diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c
index 499d7bf..75e6d5e 100644
--- a/drivers/staging/media/cec/cec-adap.c
+++ b/drivers/staging/media/cec/cec-adap.c
@@ -608,8 +608,7 @@
}
memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len);
if (msg->len == 1) {
- if (cec_msg_initiator(msg) != 0xf ||
- cec_msg_destination(msg) == 0xf) {
+ if (cec_msg_destination(msg) == 0xf) {
dprintk(1, "cec_transmit_msg: invalid poll message\n");
return -EINVAL;
}
@@ -634,7 +633,7 @@
dprintk(1, "cec_transmit_msg: destination is the adapter itself\n");
return -EINVAL;
}
- if (cec_msg_initiator(msg) != 0xf &&
+ if (msg->len > 1 && adap->is_configured &&
!cec_has_log_addr(adap, cec_msg_initiator(msg))) {
dprintk(1, "cec_transmit_msg: initiator has unknown logical address %d\n",
cec_msg_initiator(msg));
@@ -883,7 +882,7 @@
/* Send poll message */
msg.len = 1;
- msg.msg[0] = 0xf0 | log_addr;
+ msg.msg[0] = (log_addr << 4) | log_addr;
err = cec_transmit_msg_fh(adap, &msg, NULL, true);
/*
diff --git a/drivers/staging/rtl8188eu/core/rtw_cmd.c b/drivers/staging/rtl8188eu/core/rtw_cmd.c
index f1f4788..6051a7b 100644
--- a/drivers/staging/rtl8188eu/core/rtw_cmd.c
+++ b/drivers/staging/rtl8188eu/core/rtw_cmd.c
@@ -342,7 +342,7 @@
else
RT_TRACE(_module_rtl871x_cmd_c_, _drv_info_, (" createbss for SSid:%s\n", pmlmepriv->assoc_ssid.Ssid));
- pcmd = kzalloc(sizeof(struct cmd_obj), GFP_KERNEL);
+ pcmd = kzalloc(sizeof(struct cmd_obj), GFP_ATOMIC);
if (!pcmd) {
res = _FAIL;
goto exit;
@@ -522,7 +522,7 @@
if (enqueue) {
/* need enqueue, prepare cmd_obj and enqueue */
- cmdobj = kzalloc(sizeof(*cmdobj), GFP_KERNEL);
+ cmdobj = kzalloc(sizeof(*cmdobj), GFP_ATOMIC);
if (!cmdobj) {
res = _FAIL;
kfree(param);
diff --git a/drivers/staging/rtl8188eu/core/rtw_mlme.c b/drivers/staging/rtl8188eu/core/rtw_mlme.c
index ee2dcd0..0b60d1e 100644
--- a/drivers/staging/rtl8188eu/core/rtw_mlme.c
+++ b/drivers/staging/rtl8188eu/core/rtw_mlme.c
@@ -107,10 +107,10 @@
void rtw_free_mlme_priv(struct mlme_priv *pmlmepriv)
{
- rtw_free_mlme_priv_ie_data(pmlmepriv);
-
- if (pmlmepriv)
+ if (pmlmepriv) {
+ rtw_free_mlme_priv_ie_data(pmlmepriv);
vfree(pmlmepriv->free_bss_buf);
+ }
}
struct wlan_network *_rtw_alloc_network(struct mlme_priv *pmlmepriv)
diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
index 4de9dbc..c7bf8ab 100644
--- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
@@ -1397,19 +1397,13 @@
if ((check_fwstate(pmlmepriv, _FW_LINKED)) ||
(check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE))) {
len = pcur_bss->Ssid.SsidLength;
-
- wrqu->essid.length = len;
-
memcpy(extra, pcur_bss->Ssid.Ssid, len);
-
- wrqu->essid.flags = 1;
} else {
- ret = -1;
- goto exit;
+ len = 0;
+ *extra = 0;
}
-
-exit:
-
+ wrqu->essid.length = len;
+ wrqu->essid.flags = 1;
return ret;
}
diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c
index f109eeac..ab96629 100644
--- a/drivers/staging/vt6655/device_main.c
+++ b/drivers/staging/vt6655/device_main.c
@@ -1698,10 +1698,11 @@
MACbShutdown(priv);
pci_disable_device(pcid);
- pci_set_power_state(pcid, pci_choose_state(pcid, state));
spin_unlock_irqrestore(&priv->lock, flags);
+ pci_set_power_state(pcid, pci_choose_state(pcid, state));
+
return 0;
}
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 0d57829..04d2b6e 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -841,6 +841,7 @@
unsigned char *buf)
{
struct iscsi_conn *conn;
+ const bool do_put = cmd->se_cmd.se_tfo != NULL;
if (!cmd->conn) {
pr_err("cmd->conn is NULL for ITT: 0x%08x\n",
@@ -871,7 +872,7 @@
* Perform the kref_put now if se_cmd has already been setup by
* scsit_setup_scsi_cmd()
*/
- if (cmd->se_cmd.se_tfo != NULL) {
+ if (do_put) {
pr_debug("iscsi reject: calling target_put_sess_cmd >>>>>>\n");
target_put_sess_cmd(&cmd->se_cmd);
}
@@ -1939,7 +1940,6 @@
struct iscsi_tmr_req *tmr_req;
struct iscsi_tm *hdr;
int out_of_order_cmdsn = 0, ret;
- bool sess_ref = false;
u8 function, tcm_function = TMR_UNKNOWN;
hdr = (struct iscsi_tm *) buf;
@@ -1981,18 +1981,17 @@
buf);
}
+ transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
+ conn->sess->se_sess, 0, DMA_NONE,
+ TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
+
+ target_get_sess_cmd(&cmd->se_cmd, true);
+
/*
* TASK_REASSIGN for ERL=2 / connection stays inside of
* LIO-Target $FABRIC_MOD
*/
if (function != ISCSI_TM_FUNC_TASK_REASSIGN) {
- transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
- conn->sess->se_sess, 0, DMA_NONE,
- TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
-
- target_get_sess_cmd(&cmd->se_cmd, true);
- sess_ref = true;
-
switch (function) {
case ISCSI_TM_FUNC_ABORT_TASK:
tcm_function = TMR_ABORT_TASK;
@@ -2131,12 +2130,8 @@
* For connection recovery, this is also the default action for
* TMR TASK_REASSIGN.
*/
- if (sess_ref) {
- pr_debug("Handle TMR, using sess_ref=true check\n");
- target_put_sess_cmd(&cmd->se_cmd);
- }
-
iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
+ target_put_sess_cmd(&cmd->se_cmd);
return 0;
}
EXPORT_SYMBOL(iscsit_handle_task_mgt_cmd);
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 9cbbc9c..8a4bc15 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -1144,7 +1144,7 @@
ret = core_tpg_register(wwn, &tpg->tpg_se_tpg, SCSI_PROTOCOL_ISCSI);
if (ret < 0)
- return NULL;
+ goto free_out;
ret = iscsit_tpg_add_portal_group(tiqn, tpg);
if (ret != 0)
@@ -1156,6 +1156,7 @@
return &tpg->tpg_se_tpg;
out:
core_tpg_deregister(&tpg->tpg_se_tpg);
+free_out:
kfree(tpg);
return NULL;
}
diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 4c82bbe..ee5b29a 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1010,7 +1010,7 @@
static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
{
struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
- struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work);
+ struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work);
struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);
@@ -1073,17 +1073,8 @@
/*
* Flush any pending transitions
*/
- if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs &&
- atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
- ALUA_ACCESS_STATE_TRANSITION) {
- /* Just in case */
- tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
- tg_pt_gp->tg_pt_gp_transition_complete = &wait;
- flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
- wait_for_completion(&wait);
- tg_pt_gp->tg_pt_gp_transition_complete = NULL;
- return 0;
- }
+ if (!explicit)
+ flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
/*
* Save the old primary ALUA access state, and set the current state
@@ -1114,17 +1105,9 @@
atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
- if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) {
- unsigned long transition_tmo;
-
- transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ;
- queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
- &tg_pt_gp->tg_pt_gp_transition_work,
- transition_tmo);
- } else {
+ schedule_work(&tg_pt_gp->tg_pt_gp_transition_work);
+ if (explicit) {
tg_pt_gp->tg_pt_gp_transition_complete = &wait;
- queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
- &tg_pt_gp->tg_pt_gp_transition_work, 0);
wait_for_completion(&wait);
tg_pt_gp->tg_pt_gp_transition_complete = NULL;
}
@@ -1692,8 +1675,8 @@
mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex);
spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
- INIT_DELAYED_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
- core_alua_do_transition_tg_pt_work);
+ INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
+ core_alua_do_transition_tg_pt_work);
tg_pt_gp->tg_pt_gp_dev = dev;
atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED);
@@ -1801,7 +1784,7 @@
dev->t10_alua.alua_tg_pt_gps_counter--;
spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
- flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
+ flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
/*
* Allow a struct t10_alua_tg_pt_gp_member * referenced by
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 29f807b..97928b4 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -466,6 +466,10 @@
struct inode *inode = file->f_mapping->host;
int ret;
+ if (!nolb) {
+ return 0;
+ }
+
if (cmd->se_dev->dev_attrib.pi_prot_type) {
ret = fd_do_prot_unmap(cmd, lba, nolb);
if (ret)
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 47463c9..df20921 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -56,8 +56,10 @@
char *buf,
u32 size)
{
- if (!pr_reg->isid_present_at_reg)
+ if (!pr_reg->isid_present_at_reg) {
buf[0] = '\0';
+ return;
+ }
snprintf(buf, size, ",i,0x%s", pr_reg->pr_reg_isid);
}
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 27dd1e1..14bb2db 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -133,6 +133,15 @@
spin_unlock(&se_cmd->t_state_lock);
return false;
}
+ if (se_cmd->transport_state & CMD_T_PRE_EXECUTE) {
+ if (se_cmd->scsi_status) {
+ pr_debug("Attempted to abort io tag: %llu early failure"
+ " status: 0x%02x\n", se_cmd->tag,
+ se_cmd->scsi_status);
+ spin_unlock(&se_cmd->t_state_lock);
+ return false;
+ }
+ }
if (sess->sess_tearing_down || se_cmd->cmd_wait_set) {
pr_debug("Attempted to abort io tag: %llu already shutdown,"
" skipping\n", se_cmd->tag);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 4c0782c..6f3eccf 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1939,6 +1939,7 @@
}
cmd->t_state = TRANSPORT_PROCESSING;
+ cmd->transport_state &= ~CMD_T_PRE_EXECUTE;
cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
spin_unlock_irq(&cmd->t_state_lock);
@@ -2592,6 +2593,7 @@
ret = -ESHUTDOWN;
goto out;
}
+ se_cmd->transport_state |= CMD_T_PRE_EXECUTE;
list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list);
out:
spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
diff --git a/drivers/tee/Kconfig b/drivers/tee/Kconfig
new file mode 100644
index 0000000..a6df12d
--- /dev/null
+++ b/drivers/tee/Kconfig
@@ -0,0 +1,19 @@
+# Generic Trusted Execution Environment Configuration
+config TEE
+ tristate "Trusted Execution Environment support"
+ depends on HAVE_ARM_SMCCC || COMPILE_TEST
+ select DMA_SHARED_BUFFER
+ select GENERIC_ALLOCATOR
+ help
+ This implements a generic interface towards a Trusted Execution
+ Environment (TEE).
+
+if TEE
+
+menu "TEE drivers"
+
+source "drivers/tee/optee/Kconfig"
+
+endmenu
+
+endif
diff --git a/drivers/tee/Makefile b/drivers/tee/Makefile
new file mode 100644
index 0000000..7a4e4a1
--- /dev/null
+++ b/drivers/tee/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_TEE) += tee.o
+tee-objs += tee_core.o
+tee-objs += tee_shm.o
+tee-objs += tee_shm_pool.o
+obj-$(CONFIG_OPTEE) += optee/
diff --git a/drivers/tee/optee/Kconfig b/drivers/tee/optee/Kconfig
new file mode 100644
index 0000000..0126de8
--- /dev/null
+++ b/drivers/tee/optee/Kconfig
@@ -0,0 +1,7 @@
+# OP-TEE Trusted Execution Environment Configuration
+config OPTEE
+ tristate "OP-TEE"
+ depends on HAVE_ARM_SMCCC
+ help
+ This implements the OP-TEE Trusted Execution Environment (TEE)
+ driver.
diff --git a/drivers/tee/optee/Makefile b/drivers/tee/optee/Makefile
new file mode 100644
index 0000000..220cf42
--- /dev/null
+++ b/drivers/tee/optee/Makefile
@@ -0,0 +1,6 @@
+obj-$(CONFIG_OPTEE) += optee.o
+optee-objs += core.o
+optee-objs += call.o
+optee-objs += rpc.o
+optee-objs += supp.o
+optee-objs += shm_pool.o
diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c
new file mode 100644
index 0000000..a5afbe6
--- /dev/null
+++ b/drivers/tee/optee/call.c
@@ -0,0 +1,662 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/arm-smccc.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include "optee_private.h"
+#include "optee_smc.h"
+
+struct optee_call_waiter {
+ struct list_head list_node;
+ struct completion c;
+};
+
+static void optee_cq_wait_init(struct optee_call_queue *cq,
+ struct optee_call_waiter *w)
+{
+ /*
+ * We're preparing to make a call to secure world. In case we can't
+ * allocate a thread in secure world we'll end up waiting in
+ * optee_cq_wait_for_completion().
+ *
+ * Normally if there's no contention in secure world the call will
+ * complete and we can cleanup directly with optee_cq_wait_final().
+ */
+ mutex_lock(&cq->mutex);
+
+ /*
+ * We add ourselves to the queue, but we don't wait. This
+ * guarantees that we don't lose a completion if secure world
+ * returns busy and another thread just exited and try to complete
+ * someone.
+ */
+ init_completion(&w->c);
+ list_add_tail(&w->list_node, &cq->waiters);
+
+ mutex_unlock(&cq->mutex);
+}
+
+static void optee_cq_wait_for_completion(struct optee_call_queue *cq,
+ struct optee_call_waiter *w)
+{
+ wait_for_completion(&w->c);
+
+ mutex_lock(&cq->mutex);
+
+ /* Move to end of list to get out of the way for other waiters */
+ list_del(&w->list_node);
+ reinit_completion(&w->c);
+ list_add_tail(&w->list_node, &cq->waiters);
+
+ mutex_unlock(&cq->mutex);
+}
+
+static void optee_cq_complete_one(struct optee_call_queue *cq)
+{
+ struct optee_call_waiter *w;
+
+ list_for_each_entry(w, &cq->waiters, list_node) {
+ if (!completion_done(&w->c)) {
+ complete(&w->c);
+ break;
+ }
+ }
+}
+
+static void optee_cq_wait_final(struct optee_call_queue *cq,
+ struct optee_call_waiter *w)
+{
+ /*
+ * We're done with the call to secure world. The thread in secure
+ * world that was used for this call is now available for some
+ * other task to use.
+ */
+ mutex_lock(&cq->mutex);
+
+ /* Get out of the list */
+ list_del(&w->list_node);
+
+ /* Wake up one eventual waiting task */
+ optee_cq_complete_one(cq);
+
+ /*
+ * If we're completed we've got a completion from another task that
+ * was just done with its call to secure world. Since yet another
+ * thread now is available in secure world wake up another eventual
+ * waiting task.
+ */
+ if (completion_done(&w->c))
+ optee_cq_complete_one(cq);
+
+ mutex_unlock(&cq->mutex);
+}
+
+/* Requires the filpstate mutex to be held */
+static struct optee_session *find_session(struct optee_context_data *ctxdata,
+ u32 session_id)
+{
+ struct optee_session *sess;
+
+ list_for_each_entry(sess, &ctxdata->sess_list, list_node)
+ if (sess->session_id == session_id)
+ return sess;
+
+ return NULL;
+}
+
+/**
+ * optee_do_call_with_arg() - Do an SMC to OP-TEE in secure world
+ * @ctx: calling context
+ * @parg: physical address of message to pass to secure world
+ *
+ * Does and SMC to OP-TEE in secure world and handles eventual resulting
+ * Remote Procedure Calls (RPC) from OP-TEE.
+ *
+ * Returns return code from secure world, 0 is OK
+ */
+u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg)
+{
+ struct optee *optee = tee_get_drvdata(ctx->teedev);
+ struct optee_call_waiter w;
+ struct optee_rpc_param param = { };
+ struct optee_call_ctx call_ctx = { };
+ u32 ret;
+
+ param.a0 = OPTEE_SMC_CALL_WITH_ARG;
+ reg_pair_from_64(¶m.a1, ¶m.a2, parg);
+ /* Initialize waiter */
+ optee_cq_wait_init(&optee->call_queue, &w);
+ while (true) {
+ struct arm_smccc_res res;
+
+ optee->invoke_fn(param.a0, param.a1, param.a2, param.a3,
+ param.a4, param.a5, param.a6, param.a7,
+ &res);
+
+ if (res.a0 == OPTEE_SMC_RETURN_ETHREAD_LIMIT) {
+ /*
+ * Out of threads in secure world, wait for a thread
+ * become available.
+ */
+ optee_cq_wait_for_completion(&optee->call_queue, &w);
+ } else if (OPTEE_SMC_RETURN_IS_RPC(res.a0)) {
+ param.a0 = res.a0;
+ param.a1 = res.a1;
+ param.a2 = res.a2;
+ param.a3 = res.a3;
+ optee_handle_rpc(ctx, ¶m, &call_ctx);
+ } else {
+ ret = res.a0;
+ break;
+ }
+ }
+
+ optee_rpc_finalize_call(&call_ctx);
+ /*
+ * We're done with our thread in secure world, if there's any
+ * thread waiters wake up one.
+ */
+ optee_cq_wait_final(&optee->call_queue, &w);
+
+ return ret;
+}
+
+static struct tee_shm *get_msg_arg(struct tee_context *ctx, size_t num_params,
+ struct optee_msg_arg **msg_arg,
+ phys_addr_t *msg_parg)
+{
+ int rc;
+ struct tee_shm *shm;
+ struct optee_msg_arg *ma;
+
+ shm = tee_shm_alloc(ctx, OPTEE_MSG_GET_ARG_SIZE(num_params),
+ TEE_SHM_MAPPED);
+ if (IS_ERR(shm))
+ return shm;
+
+ ma = tee_shm_get_va(shm, 0);
+ if (IS_ERR(ma)) {
+ rc = PTR_ERR(ma);
+ goto out;
+ }
+
+ rc = tee_shm_get_pa(shm, 0, msg_parg);
+ if (rc)
+ goto out;
+
+ memset(ma, 0, OPTEE_MSG_GET_ARG_SIZE(num_params));
+ ma->num_params = num_params;
+ *msg_arg = ma;
+out:
+ if (rc) {
+ tee_shm_free(shm);
+ return ERR_PTR(rc);
+ }
+
+ return shm;
+}
+
+int optee_open_session(struct tee_context *ctx,
+ struct tee_ioctl_open_session_arg *arg,
+ struct tee_param *param)
+{
+ struct optee_context_data *ctxdata = ctx->data;
+ int rc;
+ struct tee_shm *shm;
+ struct optee_msg_arg *msg_arg;
+ phys_addr_t msg_parg;
+ struct optee_session *sess = NULL;
+
+ /* +2 for the meta parameters added below */
+ shm = get_msg_arg(ctx, arg->num_params + 2, &msg_arg, &msg_parg);
+ if (IS_ERR(shm))
+ return PTR_ERR(shm);
+
+ msg_arg->cmd = OPTEE_MSG_CMD_OPEN_SESSION;
+ msg_arg->cancel_id = arg->cancel_id;
+
+ /*
+ * Initialize and add the meta parameters needed when opening a
+ * session.
+ */
+ msg_arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT |
+ OPTEE_MSG_ATTR_META;
+ msg_arg->params[1].attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT |
+ OPTEE_MSG_ATTR_META;
+ memcpy(&msg_arg->params[0].u.value, arg->uuid, sizeof(arg->uuid));
+ memcpy(&msg_arg->params[1].u.value, arg->uuid, sizeof(arg->clnt_uuid));
+ msg_arg->params[1].u.value.c = arg->clnt_login;
+
+ rc = optee_to_msg_param(msg_arg->params + 2, arg->num_params, param);
+ if (rc)
+ goto out;
+
+ sess = kzalloc(sizeof(*sess), GFP_KERNEL);
+ if (!sess) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if (optee_do_call_with_arg(ctx, msg_parg)) {
+ msg_arg->ret = TEEC_ERROR_COMMUNICATION;
+ msg_arg->ret_origin = TEEC_ORIGIN_COMMS;
+ }
+
+ if (msg_arg->ret == TEEC_SUCCESS) {
+ /* A new session has been created, add it to the list. */
+ sess->session_id = msg_arg->session;
+ mutex_lock(&ctxdata->mutex);
+ list_add(&sess->list_node, &ctxdata->sess_list);
+ mutex_unlock(&ctxdata->mutex);
+ } else {
+ kfree(sess);
+ }
+
+ if (optee_from_msg_param(param, arg->num_params, msg_arg->params + 2)) {
+ arg->ret = TEEC_ERROR_COMMUNICATION;
+ arg->ret_origin = TEEC_ORIGIN_COMMS;
+ /* Close session again to avoid leakage */
+ optee_close_session(ctx, msg_arg->session);
+ } else {
+ arg->session = msg_arg->session;
+ arg->ret = msg_arg->ret;
+ arg->ret_origin = msg_arg->ret_origin;
+ }
+out:
+ tee_shm_free(shm);
+
+ return rc;
+}
+
+int optee_close_session(struct tee_context *ctx, u32 session)
+{
+ struct optee_context_data *ctxdata = ctx->data;
+ struct tee_shm *shm;
+ struct optee_msg_arg *msg_arg;
+ phys_addr_t msg_parg;
+ struct optee_session *sess;
+
+ /* Check that the session is valid and remove it from the list */
+ mutex_lock(&ctxdata->mutex);
+ sess = find_session(ctxdata, session);
+ if (sess)
+ list_del(&sess->list_node);
+ mutex_unlock(&ctxdata->mutex);
+ if (!sess)
+ return -EINVAL;
+ kfree(sess);
+
+ shm = get_msg_arg(ctx, 0, &msg_arg, &msg_parg);
+ if (IS_ERR(shm))
+ return PTR_ERR(shm);
+
+ msg_arg->cmd = OPTEE_MSG_CMD_CLOSE_SESSION;
+ msg_arg->session = session;
+ optee_do_call_with_arg(ctx, msg_parg);
+
+ tee_shm_free(shm);
+ return 0;
+}
+
+int optee_invoke_func(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg,
+ struct tee_param *param)
+{
+ struct optee_context_data *ctxdata = ctx->data;
+ struct tee_shm *shm;
+ struct optee_msg_arg *msg_arg;
+ phys_addr_t msg_parg;
+ struct optee_session *sess;
+ int rc;
+
+ /* Check that the session is valid */
+ mutex_lock(&ctxdata->mutex);
+ sess = find_session(ctxdata, arg->session);
+ mutex_unlock(&ctxdata->mutex);
+ if (!sess)
+ return -EINVAL;
+
+ shm = get_msg_arg(ctx, arg->num_params, &msg_arg, &msg_parg);
+ if (IS_ERR(shm))
+ return PTR_ERR(shm);
+ msg_arg->cmd = OPTEE_MSG_CMD_INVOKE_COMMAND;
+ msg_arg->func = arg->func;
+ msg_arg->session = arg->session;
+ msg_arg->cancel_id = arg->cancel_id;
+
+ rc = optee_to_msg_param(msg_arg->params, arg->num_params, param);
+ if (rc)
+ goto out;
+
+ if (optee_do_call_with_arg(ctx, msg_parg)) {
+ msg_arg->ret = TEEC_ERROR_COMMUNICATION;
+ msg_arg->ret_origin = TEEC_ORIGIN_COMMS;
+ }
+
+ if (optee_from_msg_param(param, arg->num_params, msg_arg->params)) {
+ msg_arg->ret = TEEC_ERROR_COMMUNICATION;
+ msg_arg->ret_origin = TEEC_ORIGIN_COMMS;
+ }
+
+ arg->ret = msg_arg->ret;
+ arg->ret_origin = msg_arg->ret_origin;
+out:
+ tee_shm_free(shm);
+ return rc;
+}
+
+int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session)
+{
+ struct optee_context_data *ctxdata = ctx->data;
+ struct tee_shm *shm;
+ struct optee_msg_arg *msg_arg;
+ phys_addr_t msg_parg;
+ struct optee_session *sess;
+
+ /* Check that the session is valid */
+ mutex_lock(&ctxdata->mutex);
+ sess = find_session(ctxdata, session);
+ mutex_unlock(&ctxdata->mutex);
+ if (!sess)
+ return -EINVAL;
+
+ shm = get_msg_arg(ctx, 0, &msg_arg, &msg_parg);
+ if (IS_ERR(shm))
+ return PTR_ERR(shm);
+
+ msg_arg->cmd = OPTEE_MSG_CMD_CANCEL;
+ msg_arg->session = session;
+ msg_arg->cancel_id = cancel_id;
+ optee_do_call_with_arg(ctx, msg_parg);
+
+ tee_shm_free(shm);
+ return 0;
+}
+
+/**
+ * optee_enable_shm_cache() - Enables caching of some shared memory allocation
+ * in OP-TEE
+ * @optee: main service struct
+ */
+void optee_enable_shm_cache(struct optee *optee)
+{
+ struct optee_call_waiter w;
+
+ /* We need to retry until secure world isn't busy. */
+ optee_cq_wait_init(&optee->call_queue, &w);
+ while (true) {
+ struct arm_smccc_res res;
+
+ optee->invoke_fn(OPTEE_SMC_ENABLE_SHM_CACHE, 0, 0, 0, 0, 0, 0,
+ 0, &res);
+ if (res.a0 == OPTEE_SMC_RETURN_OK)
+ break;
+ optee_cq_wait_for_completion(&optee->call_queue, &w);
+ }
+ optee_cq_wait_final(&optee->call_queue, &w);
+}
+
+/**
+ * optee_disable_shm_cache() - Disables caching of some shared memory allocation
+ * in OP-TEE
+ * @optee: main service struct
+ */
+void optee_disable_shm_cache(struct optee *optee)
+{
+ struct optee_call_waiter w;
+
+ /* We need to retry until secure world isn't busy. */
+ optee_cq_wait_init(&optee->call_queue, &w);
+ while (true) {
+ union {
+ struct arm_smccc_res smccc;
+ struct optee_smc_disable_shm_cache_result result;
+ } res;
+
+ optee->invoke_fn(OPTEE_SMC_DISABLE_SHM_CACHE, 0, 0, 0, 0, 0, 0,
+ 0, &res.smccc);
+ if (res.result.status == OPTEE_SMC_RETURN_ENOTAVAIL)
+ break; /* All shm's freed */
+ if (res.result.status == OPTEE_SMC_RETURN_OK) {
+ struct tee_shm *shm;
+
+ shm = reg_pair_to_ptr(res.result.shm_upper32,
+ res.result.shm_lower32);
+ tee_shm_free(shm);
+ } else {
+ optee_cq_wait_for_completion(&optee->call_queue, &w);
+ }
+ }
+ optee_cq_wait_final(&optee->call_queue, &w);
+}
+
+#define PAGELIST_ENTRIES_PER_PAGE \
+ ((OPTEE_MSG_NONCONTIG_PAGE_SIZE / sizeof(u64)) - 1)
+
+/**
+ * optee_fill_pages_list() - write list of user pages to given shared
+ * buffer.
+ *
+ * @dst: page-aligned buffer where list of pages will be stored
+ * @pages: array of pages that represents shared buffer
+ * @num_pages: number of entries in @pages
+ * @page_offset: offset of user buffer from page start
+ *
+ * @dst should be big enough to hold list of user page addresses and
+ * links to the next pages of buffer
+ */
+void optee_fill_pages_list(u64 *dst, struct page **pages, int num_pages,
+ size_t page_offset)
+{
+ int n = 0;
+ phys_addr_t optee_page;
+ /*
+ * Refer to OPTEE_MSG_ATTR_NONCONTIG description in optee_msg.h
+ * for details.
+ */
+ struct {
+ u64 pages_list[PAGELIST_ENTRIES_PER_PAGE];
+ u64 next_page_data;
+ } *pages_data;
+
+ /*
+ * Currently OP-TEE uses 4k page size and it does not looks
+ * like this will change in the future. On other hand, there are
+ * no know ARM architectures with page size < 4k.
+ * Thus the next built assert looks redundant. But the following
+ * code heavily relies on this assumption, so it is better be
+ * safe than sorry.
+ */
+ BUILD_BUG_ON(PAGE_SIZE < OPTEE_MSG_NONCONTIG_PAGE_SIZE);
+
+ pages_data = (void *)dst;
+ /*
+ * If linux page is bigger than 4k, and user buffer offset is
+ * larger than 4k/8k/12k/etc this will skip first 4k pages,
+ * because they bear no value data for OP-TEE.
+ */
+ optee_page = page_to_phys(*pages) +
+ round_down(page_offset, OPTEE_MSG_NONCONTIG_PAGE_SIZE);
+
+ while (true) {
+ pages_data->pages_list[n++] = optee_page;
+
+ if (n == PAGELIST_ENTRIES_PER_PAGE) {
+ pages_data->next_page_data =
+ virt_to_phys(pages_data + 1);
+ pages_data++;
+ n = 0;
+ }
+
+ optee_page += OPTEE_MSG_NONCONTIG_PAGE_SIZE;
+ if (!(optee_page & ~PAGE_MASK)) {
+ if (!--num_pages)
+ break;
+ pages++;
+ optee_page = page_to_phys(*pages);
+ }
+ }
+}
+
+/*
+ * The final entry in each pagelist page is a pointer to the next
+ * pagelist page.
+ */
+static size_t get_pages_list_size(size_t num_entries)
+{
+ int pages = DIV_ROUND_UP(num_entries, PAGELIST_ENTRIES_PER_PAGE);
+
+ return pages * OPTEE_MSG_NONCONTIG_PAGE_SIZE;
+}
+
+u64 *optee_allocate_pages_list(size_t num_entries)
+{
+ return alloc_pages_exact(get_pages_list_size(num_entries), GFP_KERNEL);
+}
+
+void optee_free_pages_list(void *list, size_t num_entries)
+{
+ free_pages_exact(list, get_pages_list_size(num_entries));
+}
+
+static bool is_normal_memory(pgprot_t p)
+{
+#if defined(CONFIG_ARM)
+ return (pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEALLOC;
+#elif defined(CONFIG_ARM64)
+ return (pgprot_val(p) & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL);
+#else
+#error "Unuspported architecture"
+#endif
+}
+
+static int __check_mem_type(struct vm_area_struct *vma, unsigned long end)
+{
+ while (vma && is_normal_memory(vma->vm_page_prot)) {
+ if (vma->vm_end >= end)
+ return 0;
+ vma = vma->vm_next;
+ }
+
+ return -EINVAL;
+}
+
+static int check_mem_type(unsigned long start, size_t num_pages)
+{
+ struct mm_struct *mm = current->mm;
+ int rc;
+
+ down_read(&mm->mmap_sem);
+ rc = __check_mem_type(find_vma(mm, start),
+ start + num_pages * PAGE_SIZE);
+ up_read(&mm->mmap_sem);
+
+ return rc;
+}
+
+int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm,
+ struct page **pages, size_t num_pages,
+ unsigned long start)
+{
+ struct tee_shm *shm_arg = NULL;
+ struct optee_msg_arg *msg_arg;
+ u64 *pages_list;
+ phys_addr_t msg_parg;
+ int rc;
+
+ if (!num_pages)
+ return -EINVAL;
+
+ rc = check_mem_type(start, num_pages);
+ if (rc)
+ return rc;
+
+ pages_list = optee_allocate_pages_list(num_pages);
+ if (!pages_list)
+ return -ENOMEM;
+
+ shm_arg = get_msg_arg(ctx, 1, &msg_arg, &msg_parg);
+ if (IS_ERR(shm_arg)) {
+ rc = PTR_ERR(shm_arg);
+ goto out;
+ }
+
+ optee_fill_pages_list(pages_list, pages, num_pages,
+ tee_shm_get_page_offset(shm));
+
+ msg_arg->cmd = OPTEE_MSG_CMD_REGISTER_SHM;
+ msg_arg->params->attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT |
+ OPTEE_MSG_ATTR_NONCONTIG;
+ msg_arg->params->u.tmem.shm_ref = (unsigned long)shm;
+ msg_arg->params->u.tmem.size = tee_shm_get_size(shm);
+ /*
+ * In the least bits of msg_arg->params->u.tmem.buf_ptr we
+ * store buffer offset from 4k page, as described in OP-TEE ABI.
+ */
+ msg_arg->params->u.tmem.buf_ptr = virt_to_phys(pages_list) |
+ (tee_shm_get_page_offset(shm) & (OPTEE_MSG_NONCONTIG_PAGE_SIZE - 1));
+
+ if (optee_do_call_with_arg(ctx, msg_parg) ||
+ msg_arg->ret != TEEC_SUCCESS)
+ rc = -EINVAL;
+
+ tee_shm_free(shm_arg);
+out:
+ optee_free_pages_list(pages_list, num_pages);
+ return rc;
+}
+
+int optee_shm_unregister(struct tee_context *ctx, struct tee_shm *shm)
+{
+ struct tee_shm *shm_arg;
+ struct optee_msg_arg *msg_arg;
+ phys_addr_t msg_parg;
+ int rc = 0;
+
+ shm_arg = get_msg_arg(ctx, 1, &msg_arg, &msg_parg);
+ if (IS_ERR(shm_arg))
+ return PTR_ERR(shm_arg);
+
+ msg_arg->cmd = OPTEE_MSG_CMD_UNREGISTER_SHM;
+
+ msg_arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_RMEM_INPUT;
+ msg_arg->params[0].u.rmem.shm_ref = (unsigned long)shm;
+
+ if (optee_do_call_with_arg(ctx, msg_parg) ||
+ msg_arg->ret != TEEC_SUCCESS)
+ rc = -EINVAL;
+ tee_shm_free(shm_arg);
+ return rc;
+}
+
+int optee_shm_register_supp(struct tee_context *ctx, struct tee_shm *shm,
+ struct page **pages, size_t num_pages,
+ unsigned long start)
+{
+ /*
+ * We don't want to register supplicant memory in OP-TEE.
+ * Instead information about it will be passed in RPC code.
+ */
+ return check_mem_type(start, num_pages);
+}
+
+int optee_shm_unregister_supp(struct tee_context *ctx, struct tee_shm *shm)
+{
+ return 0;
+}
diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
new file mode 100644
index 0000000..e9843c5
--- /dev/null
+++ b/drivers/tee/optee/core.c
@@ -0,0 +1,705 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/arm-smccc.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/tee_drv.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include "optee_private.h"
+#include "optee_smc.h"
+#include "shm_pool.h"
+
+#define DRIVER_NAME "optee"
+
+#define OPTEE_SHM_NUM_PRIV_PAGES 1
+
+/**
+ * optee_from_msg_param() - convert from OPTEE_MSG parameters to
+ * struct tee_param
+ * @params: subsystem internal parameter representation
+ * @num_params: number of elements in the parameter arrays
+ * @msg_params: OPTEE_MSG parameters
+ * Returns 0 on success or <0 on failure
+ */
+int optee_from_msg_param(struct tee_param *params, size_t num_params,
+ const struct optee_msg_param *msg_params)
+{
+ int rc;
+ size_t n;
+ struct tee_shm *shm;
+ phys_addr_t pa;
+
+ for (n = 0; n < num_params; n++) {
+ struct tee_param *p = params + n;
+ const struct optee_msg_param *mp = msg_params + n;
+ u32 attr = mp->attr & OPTEE_MSG_ATTR_TYPE_MASK;
+
+ switch (attr) {
+ case OPTEE_MSG_ATTR_TYPE_NONE:
+ p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_NONE;
+ memset(&p->u, 0, sizeof(p->u));
+ break;
+ case OPTEE_MSG_ATTR_TYPE_VALUE_INPUT:
+ case OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT:
+ case OPTEE_MSG_ATTR_TYPE_VALUE_INOUT:
+ p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT +
+ attr - OPTEE_MSG_ATTR_TYPE_VALUE_INPUT;
+ p->u.value.a = mp->u.value.a;
+ p->u.value.b = mp->u.value.b;
+ p->u.value.c = mp->u.value.c;
+ break;
+ case OPTEE_MSG_ATTR_TYPE_TMEM_INPUT:
+ case OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT:
+ case OPTEE_MSG_ATTR_TYPE_TMEM_INOUT:
+ p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT +
+ attr - OPTEE_MSG_ATTR_TYPE_TMEM_INPUT;
+ p->u.memref.size = mp->u.tmem.size;
+ shm = (struct tee_shm *)(unsigned long)
+ mp->u.tmem.shm_ref;
+ if (!shm) {
+ p->u.memref.shm_offs = 0;
+ p->u.memref.shm = NULL;
+ break;
+ }
+ rc = tee_shm_get_pa(shm, 0, &pa);
+ if (rc)
+ return rc;
+ p->u.memref.shm_offs = mp->u.tmem.buf_ptr - pa;
+ p->u.memref.shm = shm;
+
+ /* Check that the memref is covered by the shm object */
+ if (p->u.memref.size) {
+ size_t o = p->u.memref.shm_offs +
+ p->u.memref.size - 1;
+
+ rc = tee_shm_get_pa(shm, o, NULL);
+ if (rc)
+ return rc;
+ }
+ break;
+ case OPTEE_MSG_ATTR_TYPE_RMEM_INPUT:
+ case OPTEE_MSG_ATTR_TYPE_RMEM_OUTPUT:
+ case OPTEE_MSG_ATTR_TYPE_RMEM_INOUT:
+ p->attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT +
+ attr - OPTEE_MSG_ATTR_TYPE_RMEM_INPUT;
+ p->u.memref.size = mp->u.rmem.size;
+ shm = (struct tee_shm *)(unsigned long)
+ mp->u.rmem.shm_ref;
+
+ if (!shm) {
+ p->u.memref.shm_offs = 0;
+ p->u.memref.shm = NULL;
+ break;
+ }
+ p->u.memref.shm_offs = mp->u.rmem.offs;
+ p->u.memref.shm = shm;
+
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int to_msg_param_tmp_mem(struct optee_msg_param *mp,
+ const struct tee_param *p)
+{
+ int rc;
+ phys_addr_t pa;
+
+ mp->attr = OPTEE_MSG_ATTR_TYPE_TMEM_INPUT + p->attr -
+ TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+
+ mp->u.tmem.shm_ref = (unsigned long)p->u.memref.shm;
+ mp->u.tmem.size = p->u.memref.size;
+
+ if (!p->u.memref.shm) {
+ mp->u.tmem.buf_ptr = 0;
+ return 0;
+ }
+
+ rc = tee_shm_get_pa(p->u.memref.shm, p->u.memref.shm_offs, &pa);
+ if (rc)
+ return rc;
+
+ mp->u.tmem.buf_ptr = pa;
+ mp->attr |= OPTEE_MSG_ATTR_CACHE_PREDEFINED <<
+ OPTEE_MSG_ATTR_CACHE_SHIFT;
+
+ return 0;
+}
+
+static int to_msg_param_reg_mem(struct optee_msg_param *mp,
+ const struct tee_param *p)
+{
+ mp->attr = OPTEE_MSG_ATTR_TYPE_RMEM_INPUT + p->attr -
+ TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+
+ mp->u.rmem.shm_ref = (unsigned long)p->u.memref.shm;
+ mp->u.rmem.size = p->u.memref.size;
+ mp->u.rmem.offs = p->u.memref.shm_offs;
+ return 0;
+}
+
+/**
+ * optee_to_msg_param() - convert from struct tee_params to OPTEE_MSG parameters
+ * @msg_params: OPTEE_MSG parameters
+ * @num_params: number of elements in the parameter arrays
+ * @params: subsystem itnernal parameter representation
+ * Returns 0 on success or <0 on failure
+ */
+int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params,
+ const struct tee_param *params)
+{
+ int rc;
+ size_t n;
+
+ for (n = 0; n < num_params; n++) {
+ const struct tee_param *p = params + n;
+ struct optee_msg_param *mp = msg_params + n;
+
+ switch (p->attr) {
+ case TEE_IOCTL_PARAM_ATTR_TYPE_NONE:
+ mp->attr = TEE_IOCTL_PARAM_ATTR_TYPE_NONE;
+ memset(&mp->u, 0, sizeof(mp->u));
+ break;
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+ mp->attr = OPTEE_MSG_ATTR_TYPE_VALUE_INPUT + p->attr -
+ TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+ mp->u.value.a = p->u.value.a;
+ mp->u.value.b = p->u.value.b;
+ mp->u.value.c = p->u.value.c;
+ break;
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+ if (tee_shm_is_registered(p->u.memref.shm))
+ rc = to_msg_param_reg_mem(mp, p);
+ else
+ rc = to_msg_param_tmp_mem(mp, p);
+ if (rc)
+ return rc;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static void optee_get_version(struct tee_device *teedev,
+ struct tee_ioctl_version_data *vers)
+{
+ struct tee_ioctl_version_data v = {
+ .impl_id = TEE_IMPL_ID_OPTEE,
+ .impl_caps = TEE_OPTEE_CAP_TZ,
+ .gen_caps = TEE_GEN_CAP_GP,
+ };
+ struct optee *optee = tee_get_drvdata(teedev);
+
+ if (optee->sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM)
+ v.gen_caps |= TEE_GEN_CAP_REG_MEM;
+ *vers = v;
+}
+
+static int optee_open(struct tee_context *ctx)
+{
+ struct optee_context_data *ctxdata;
+ struct tee_device *teedev = ctx->teedev;
+ struct optee *optee = tee_get_drvdata(teedev);
+
+ ctxdata = kzalloc(sizeof(*ctxdata), GFP_KERNEL);
+ if (!ctxdata)
+ return -ENOMEM;
+
+ if (teedev == optee->supp_teedev) {
+ bool busy = true;
+
+ mutex_lock(&optee->supp.mutex);
+ if (!optee->supp.ctx) {
+ busy = false;
+ optee->supp.ctx = ctx;
+ }
+ mutex_unlock(&optee->supp.mutex);
+ if (busy) {
+ kfree(ctxdata);
+ return -EBUSY;
+ }
+ }
+
+ mutex_init(&ctxdata->mutex);
+ INIT_LIST_HEAD(&ctxdata->sess_list);
+
+ ctx->data = ctxdata;
+ return 0;
+}
+
+static void optee_release(struct tee_context *ctx)
+{
+ struct optee_context_data *ctxdata = ctx->data;
+ struct tee_device *teedev = ctx->teedev;
+ struct optee *optee = tee_get_drvdata(teedev);
+ struct tee_shm *shm;
+ struct optee_msg_arg *arg = NULL;
+ phys_addr_t parg;
+ struct optee_session *sess;
+ struct optee_session *sess_tmp;
+
+ if (!ctxdata)
+ return;
+
+ shm = tee_shm_alloc(ctx, sizeof(struct optee_msg_arg), TEE_SHM_MAPPED);
+ if (!IS_ERR(shm)) {
+ arg = tee_shm_get_va(shm, 0);
+ /*
+ * If va2pa fails for some reason, we can't call into
+ * secure world, only free the memory. Secure OS will leak
+ * sessions and finally refuse more sessions, but we will
+ * at least let normal world reclaim its memory.
+ */
+ if (!IS_ERR(arg))
+ if (tee_shm_va2pa(shm, arg, &parg))
+ arg = NULL; /* prevent usage of parg below */
+ }
+
+ list_for_each_entry_safe(sess, sess_tmp, &ctxdata->sess_list,
+ list_node) {
+ list_del(&sess->list_node);
+ if (!IS_ERR_OR_NULL(arg)) {
+ memset(arg, 0, sizeof(*arg));
+ arg->cmd = OPTEE_MSG_CMD_CLOSE_SESSION;
+ arg->session = sess->session_id;
+ optee_do_call_with_arg(ctx, parg);
+ }
+ kfree(sess);
+ }
+ kfree(ctxdata);
+
+ if (!IS_ERR(shm))
+ tee_shm_free(shm);
+
+ ctx->data = NULL;
+
+ if (teedev == optee->supp_teedev)
+ optee_supp_release(&optee->supp);
+}
+
+static const struct tee_driver_ops optee_ops = {
+ .get_version = optee_get_version,
+ .open = optee_open,
+ .release = optee_release,
+ .open_session = optee_open_session,
+ .close_session = optee_close_session,
+ .invoke_func = optee_invoke_func,
+ .cancel_req = optee_cancel_req,
+ .shm_register = optee_shm_register,
+ .shm_unregister = optee_shm_unregister,
+};
+
+static const struct tee_desc optee_desc = {
+ .name = DRIVER_NAME "-clnt",
+ .ops = &optee_ops,
+ .owner = THIS_MODULE,
+};
+
+static const struct tee_driver_ops optee_supp_ops = {
+ .get_version = optee_get_version,
+ .open = optee_open,
+ .release = optee_release,
+ .supp_recv = optee_supp_recv,
+ .supp_send = optee_supp_send,
+ .shm_register = optee_shm_register_supp,
+ .shm_unregister = optee_shm_unregister_supp,
+};
+
+static const struct tee_desc optee_supp_desc = {
+ .name = DRIVER_NAME "-supp",
+ .ops = &optee_supp_ops,
+ .owner = THIS_MODULE,
+ .flags = TEE_DESC_PRIVILEGED,
+};
+
+static bool optee_msg_api_uid_is_optee_api(optee_invoke_fn *invoke_fn)
+{
+ struct arm_smccc_res res;
+
+ invoke_fn(OPTEE_SMC_CALLS_UID, 0, 0, 0, 0, 0, 0, 0, &res);
+
+ if (res.a0 == OPTEE_MSG_UID_0 && res.a1 == OPTEE_MSG_UID_1 &&
+ res.a2 == OPTEE_MSG_UID_2 && res.a3 == OPTEE_MSG_UID_3)
+ return true;
+ return false;
+}
+
+static bool optee_msg_api_revision_is_compatible(optee_invoke_fn *invoke_fn)
+{
+ union {
+ struct arm_smccc_res smccc;
+ struct optee_smc_calls_revision_result result;
+ } res;
+
+ invoke_fn(OPTEE_SMC_CALLS_REVISION, 0, 0, 0, 0, 0, 0, 0, &res.smccc);
+
+ if (res.result.major == OPTEE_MSG_REVISION_MAJOR &&
+ (int)res.result.minor >= OPTEE_MSG_REVISION_MINOR)
+ return true;
+ return false;
+}
+
+static bool optee_msg_exchange_capabilities(optee_invoke_fn *invoke_fn,
+ u32 *sec_caps)
+{
+ union {
+ struct arm_smccc_res smccc;
+ struct optee_smc_exchange_capabilities_result result;
+ } res;
+ u32 a1 = 0;
+
+ /*
+ * TODO This isn't enough to tell if it's UP system (from kernel
+ * point of view) or not, is_smp() returns the the information
+ * needed, but can't be called directly from here.
+ */
+ if (!IS_ENABLED(CONFIG_SMP) || nr_cpu_ids == 1)
+ a1 |= OPTEE_SMC_NSEC_CAP_UNIPROCESSOR;
+
+ invoke_fn(OPTEE_SMC_EXCHANGE_CAPABILITIES, a1, 0, 0, 0, 0, 0, 0,
+ &res.smccc);
+
+ if (res.result.status != OPTEE_SMC_RETURN_OK)
+ return false;
+
+ *sec_caps = res.result.capabilities;
+ return true;
+}
+
+static struct tee_shm_pool *
+optee_config_shm_memremap(optee_invoke_fn *invoke_fn, void **memremaped_shm,
+ u32 sec_caps)
+{
+ union {
+ struct arm_smccc_res smccc;
+ struct optee_smc_get_shm_config_result result;
+ } res;
+ unsigned long vaddr;
+ phys_addr_t paddr;
+ size_t size;
+ phys_addr_t begin;
+ phys_addr_t end;
+ void *va;
+ struct tee_shm_pool_mgr *priv_mgr;
+ struct tee_shm_pool_mgr *dmabuf_mgr;
+ void *rc;
+
+ invoke_fn(OPTEE_SMC_GET_SHM_CONFIG, 0, 0, 0, 0, 0, 0, 0, &res.smccc);
+ if (res.result.status != OPTEE_SMC_RETURN_OK) {
+ pr_info("shm service not available\n");
+ return ERR_PTR(-ENOENT);
+ }
+
+ if (res.result.settings != OPTEE_SMC_SHM_CACHED) {
+ pr_err("only normal cached shared memory supported\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ begin = roundup(res.result.start, PAGE_SIZE);
+ end = rounddown(res.result.start + res.result.size, PAGE_SIZE);
+ paddr = begin;
+ size = end - begin;
+
+ if (size < 2 * OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE) {
+ pr_err("too small shared memory area\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ va = memremap(paddr, size, MEMREMAP_WB);
+ if (!va) {
+ pr_err("shared memory ioremap failed\n");
+ return ERR_PTR(-EINVAL);
+ }
+ vaddr = (unsigned long)va;
+
+ /*
+ * If OP-TEE can work with unregistered SHM, we will use own pool
+ * for private shm
+ */
+ if (sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM) {
+ rc = optee_shm_pool_alloc_pages();
+ if (IS_ERR(rc))
+ goto err_memunmap;
+ priv_mgr = rc;
+ } else {
+ const size_t sz = OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
+
+ rc = tee_shm_pool_mgr_alloc_res_mem(vaddr, paddr, sz,
+ 3 /* 8 bytes aligned */);
+ if (IS_ERR(rc))
+ goto err_memunmap;
+ priv_mgr = rc;
+
+ vaddr += sz;
+ paddr += sz;
+ size -= sz;
+ }
+
+ rc = tee_shm_pool_mgr_alloc_res_mem(vaddr, paddr, size, PAGE_SHIFT);
+ if (IS_ERR(rc))
+ goto err_free_priv_mgr;
+ dmabuf_mgr = rc;
+
+ rc = tee_shm_pool_alloc(priv_mgr, dmabuf_mgr);
+ if (IS_ERR(rc))
+ goto err_free_dmabuf_mgr;
+
+ *memremaped_shm = va;
+
+ return rc;
+
+err_free_dmabuf_mgr:
+ tee_shm_pool_mgr_destroy(dmabuf_mgr);
+err_free_priv_mgr:
+ tee_shm_pool_mgr_destroy(priv_mgr);
+err_memunmap:
+ memunmap(va);
+ return rc;
+}
+
+/* Simple wrapper functions to be able to use a function pointer */
+static void optee_smccc_smc(unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3,
+ unsigned long a4, unsigned long a5,
+ unsigned long a6, unsigned long a7,
+ struct arm_smccc_res *res)
+{
+ arm_smccc_smc(a0, a1, a2, a3, a4, a5, a6, a7, res);
+}
+
+static void optee_smccc_hvc(unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3,
+ unsigned long a4, unsigned long a5,
+ unsigned long a6, unsigned long a7,
+ struct arm_smccc_res *res)
+{
+ arm_smccc_hvc(a0, a1, a2, a3, a4, a5, a6, a7, res);
+}
+
+static optee_invoke_fn *get_invoke_func(struct device_node *np)
+{
+ const char *method;
+
+ pr_info("probing for conduit method from DT.\n");
+
+ if (of_property_read_string(np, "method", &method)) {
+ pr_warn("missing \"method\" property\n");
+ return ERR_PTR(-ENXIO);
+ }
+
+ if (!strcmp("hvc", method))
+ return optee_smccc_hvc;
+ else if (!strcmp("smc", method))
+ return optee_smccc_smc;
+
+ pr_warn("invalid \"method\" property: %s\n", method);
+ return ERR_PTR(-EINVAL);
+}
+
+static struct optee *optee_probe(struct device_node *np)
+{
+ optee_invoke_fn *invoke_fn;
+ struct tee_shm_pool *pool;
+ struct optee *optee = NULL;
+ void *memremaped_shm = NULL;
+ struct tee_device *teedev;
+ u32 sec_caps;
+ int rc;
+
+ invoke_fn = get_invoke_func(np);
+ if (IS_ERR(invoke_fn))
+ return (void *)invoke_fn;
+
+ if (!optee_msg_api_uid_is_optee_api(invoke_fn)) {
+ pr_warn("api uid mismatch\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!optee_msg_api_revision_is_compatible(invoke_fn)) {
+ pr_warn("api revision mismatch\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!optee_msg_exchange_capabilities(invoke_fn, &sec_caps)) {
+ pr_warn("capabilities mismatch\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ /*
+ * We have no other option for shared memory, if secure world
+ * doesn't have any reserved memory we can use we can't continue.
+ */
+ if (!(sec_caps & OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM))
+ return ERR_PTR(-EINVAL);
+
+ pool = optee_config_shm_memremap(invoke_fn, &memremaped_shm, sec_caps);
+ if (IS_ERR(pool))
+ return (void *)pool;
+
+ optee = kzalloc(sizeof(*optee), GFP_KERNEL);
+ if (!optee) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ optee->invoke_fn = invoke_fn;
+ optee->sec_caps = sec_caps;
+
+ teedev = tee_device_alloc(&optee_desc, NULL, pool, optee);
+ if (IS_ERR(teedev)) {
+ rc = PTR_ERR(teedev);
+ goto err;
+ }
+ optee->teedev = teedev;
+
+ teedev = tee_device_alloc(&optee_supp_desc, NULL, pool, optee);
+ if (IS_ERR(teedev)) {
+ rc = PTR_ERR(teedev);
+ goto err;
+ }
+ optee->supp_teedev = teedev;
+
+ rc = tee_device_register(optee->teedev);
+ if (rc)
+ goto err;
+
+ rc = tee_device_register(optee->supp_teedev);
+ if (rc)
+ goto err;
+
+ mutex_init(&optee->call_queue.mutex);
+ INIT_LIST_HEAD(&optee->call_queue.waiters);
+ optee_wait_queue_init(&optee->wait_queue);
+ optee_supp_init(&optee->supp);
+ optee->memremaped_shm = memremaped_shm;
+ optee->pool = pool;
+
+ optee_enable_shm_cache(optee);
+
+ pr_info("initialized driver\n");
+ return optee;
+err:
+ if (optee) {
+ /*
+ * tee_device_unregister() is safe to call even if the
+ * devices hasn't been registered with
+ * tee_device_register() yet.
+ */
+ tee_device_unregister(optee->supp_teedev);
+ tee_device_unregister(optee->teedev);
+ kfree(optee);
+ }
+ if (pool)
+ tee_shm_pool_free(pool);
+ if (memremaped_shm)
+ memunmap(memremaped_shm);
+ return ERR_PTR(rc);
+}
+
+static void optee_remove(struct optee *optee)
+{
+ /*
+ * Ask OP-TEE to free all cached shared memory objects to decrease
+ * reference counters and also avoid wild pointers in secure world
+ * into the old shared memory range.
+ */
+ optee_disable_shm_cache(optee);
+
+ /*
+ * The two devices has to be unregistered before we can free the
+ * other resources.
+ */
+ tee_device_unregister(optee->supp_teedev);
+ tee_device_unregister(optee->teedev);
+
+ tee_shm_pool_free(optee->pool);
+ if (optee->memremaped_shm)
+ memunmap(optee->memremaped_shm);
+ optee_wait_queue_exit(&optee->wait_queue);
+ optee_supp_uninit(&optee->supp);
+ mutex_destroy(&optee->call_queue.mutex);
+
+ kfree(optee);
+}
+
+static const struct of_device_id optee_match[] = {
+ { .compatible = "linaro,optee-tz" },
+ {},
+};
+
+static struct optee *optee_svc;
+
+static int __init optee_driver_init(void)
+{
+ struct device_node *fw_np;
+ struct device_node *np;
+ struct optee *optee;
+
+ /* Node is supposed to be below /firmware */
+ fw_np = of_find_node_by_name(NULL, "firmware");
+ if (!fw_np)
+ return -ENODEV;
+
+ np = of_find_matching_node(fw_np, optee_match);
+ if (!np)
+ return -ENODEV;
+
+ optee = optee_probe(np);
+ of_node_put(np);
+
+ if (IS_ERR(optee))
+ return PTR_ERR(optee);
+
+ optee_svc = optee;
+
+ return 0;
+}
+module_init(optee_driver_init);
+
+static void __exit optee_driver_exit(void)
+{
+ struct optee *optee = optee_svc;
+
+ optee_svc = NULL;
+ if (optee)
+ optee_remove(optee);
+}
+module_exit(optee_driver_exit);
+
+MODULE_AUTHOR("Linaro");
+MODULE_DESCRIPTION("OP-TEE driver");
+MODULE_SUPPORTED_DEVICE("");
+MODULE_VERSION("1.0");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/tee/optee/optee_msg.h b/drivers/tee/optee/optee_msg.h
new file mode 100644
index 0000000..3050490
--- /dev/null
+++ b/drivers/tee/optee/optee_msg.h
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _OPTEE_MSG_H
+#define _OPTEE_MSG_H
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+
+/*
+ * This file defines the OP-TEE message protocol used to communicate
+ * with an instance of OP-TEE running in secure world.
+ *
+ * This file is divided into three sections.
+ * 1. Formatting of messages.
+ * 2. Requests from normal world
+ * 3. Requests from secure world, Remote Procedure Call (RPC), handled by
+ * tee-supplicant.
+ */
+
+/*****************************************************************************
+ * Part 1 - formatting of messages
+ *****************************************************************************/
+
+#define OPTEE_MSG_ATTR_TYPE_NONE 0x0
+#define OPTEE_MSG_ATTR_TYPE_VALUE_INPUT 0x1
+#define OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT 0x2
+#define OPTEE_MSG_ATTR_TYPE_VALUE_INOUT 0x3
+#define OPTEE_MSG_ATTR_TYPE_RMEM_INPUT 0x5
+#define OPTEE_MSG_ATTR_TYPE_RMEM_OUTPUT 0x6
+#define OPTEE_MSG_ATTR_TYPE_RMEM_INOUT 0x7
+#define OPTEE_MSG_ATTR_TYPE_TMEM_INPUT 0x9
+#define OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT 0xa
+#define OPTEE_MSG_ATTR_TYPE_TMEM_INOUT 0xb
+
+#define OPTEE_MSG_ATTR_TYPE_MASK GENMASK(7, 0)
+
+/*
+ * Meta parameter to be absorbed by the Secure OS and not passed
+ * to the Trusted Application.
+ *
+ * Currently only used with OPTEE_MSG_CMD_OPEN_SESSION.
+ */
+#define OPTEE_MSG_ATTR_META BIT(8)
+
+/*
+ * Pointer to a list of pages used to register user-defined SHM buffer.
+ * Used with OPTEE_MSG_ATTR_TYPE_TMEM_*.
+ * buf_ptr should point to the beginning of the buffer. Buffer will contain
+ * list of page addresses. OP-TEE core can reconstruct contiguous buffer from
+ * that page addresses list. Page addresses are stored as 64 bit values.
+ * Last entry on a page should point to the next page of buffer.
+ * Every entry in buffer should point to a 4k page beginning (12 least
+ * significant bits must be equal to zero).
+ *
+ * 12 least significant bints of optee_msg_param.u.tmem.buf_ptr should hold page
+ * offset of the user buffer.
+ *
+ * So, entries should be placed like members of this structure:
+ *
+ * struct page_data {
+ * uint64_t pages_array[OPTEE_MSG_NONCONTIG_PAGE_SIZE/sizeof(uint64_t) - 1];
+ * uint64_t next_page_data;
+ * };
+ *
+ * Structure is designed to exactly fit into the page size
+ * OPTEE_MSG_NONCONTIG_PAGE_SIZE which is a standard 4KB page.
+ *
+ * The size of 4KB is chosen because this is the smallest page size for ARM
+ * architectures. If REE uses larger pages, it should divide them to 4KB ones.
+ */
+#define OPTEE_MSG_ATTR_NONCONTIG BIT(9)
+
+/*
+ * Memory attributes for caching passed with temp memrefs. The actual value
+ * used is defined outside the message protocol with the exception of
+ * OPTEE_MSG_ATTR_CACHE_PREDEFINED which means the attributes already
+ * defined for the memory range should be used. If optee_smc.h is used as
+ * bearer of this protocol OPTEE_SMC_SHM_* is used for values.
+ */
+#define OPTEE_MSG_ATTR_CACHE_SHIFT 16
+#define OPTEE_MSG_ATTR_CACHE_MASK GENMASK(2, 0)
+#define OPTEE_MSG_ATTR_CACHE_PREDEFINED 0
+
+/*
+ * Same values as TEE_LOGIN_* from TEE Internal API
+ */
+#define OPTEE_MSG_LOGIN_PUBLIC 0x00000000
+#define OPTEE_MSG_LOGIN_USER 0x00000001
+#define OPTEE_MSG_LOGIN_GROUP 0x00000002
+#define OPTEE_MSG_LOGIN_APPLICATION 0x00000004
+#define OPTEE_MSG_LOGIN_APPLICATION_USER 0x00000005
+#define OPTEE_MSG_LOGIN_APPLICATION_GROUP 0x00000006
+
+/*
+ * Page size used in non-contiguous buffer entries
+ */
+#define OPTEE_MSG_NONCONTIG_PAGE_SIZE 4096
+
+/**
+ * struct optee_msg_param_tmem - temporary memory reference parameter
+ * @buf_ptr: Address of the buffer
+ * @size: Size of the buffer
+ * @shm_ref: Temporary shared memory reference, pointer to a struct tee_shm
+ *
+ * Secure and normal world communicates pointers as physical address
+ * instead of the virtual address. This is because secure and normal world
+ * have completely independent memory mapping. Normal world can even have a
+ * hypervisor which need to translate the guest physical address (AKA IPA
+ * in ARM documentation) to a real physical address before passing the
+ * structure to secure world.
+ */
+struct optee_msg_param_tmem {
+ u64 buf_ptr;
+ u64 size;
+ u64 shm_ref;
+};
+
+/**
+ * struct optee_msg_param_rmem - registered memory reference parameter
+ * @offs: Offset into shared memory reference
+ * @size: Size of the buffer
+ * @shm_ref: Shared memory reference, pointer to a struct tee_shm
+ */
+struct optee_msg_param_rmem {
+ u64 offs;
+ u64 size;
+ u64 shm_ref;
+};
+
+/**
+ * struct optee_msg_param_value - opaque value parameter
+ *
+ * Value parameters are passed unchecked between normal and secure world.
+ */
+struct optee_msg_param_value {
+ u64 a;
+ u64 b;
+ u64 c;
+};
+
+/**
+ * struct optee_msg_param - parameter used together with struct optee_msg_arg
+ * @attr: attributes
+ * @tmem: parameter by temporary memory reference
+ * @rmem: parameter by registered memory reference
+ * @value: parameter by opaque value
+ *
+ * @attr & OPTEE_MSG_ATTR_TYPE_MASK indicates if tmem, rmem or value is used in
+ * the union. OPTEE_MSG_ATTR_TYPE_VALUE_* indicates value,
+ * OPTEE_MSG_ATTR_TYPE_TMEM_* indicates @tmem and
+ * OPTEE_MSG_ATTR_TYPE_RMEM_* indicates @rmem,
+ * OPTEE_MSG_ATTR_TYPE_NONE indicates that none of the members are used.
+ */
+struct optee_msg_param {
+ u64 attr;
+ union {
+ struct optee_msg_param_tmem tmem;
+ struct optee_msg_param_rmem rmem;
+ struct optee_msg_param_value value;
+ } u;
+};
+
+/**
+ * struct optee_msg_arg - call argument
+ * @cmd: Command, one of OPTEE_MSG_CMD_* or OPTEE_MSG_RPC_CMD_*
+ * @func: Trusted Application function, specific to the Trusted Application,
+ * used if cmd == OPTEE_MSG_CMD_INVOKE_COMMAND
+ * @session: In parameter for all OPTEE_MSG_CMD_* except
+ * OPTEE_MSG_CMD_OPEN_SESSION where it's an output parameter instead
+ * @cancel_id: Cancellation id, a unique value to identify this request
+ * @ret: return value
+ * @ret_origin: origin of the return value
+ * @num_params: number of parameters supplied to the OS Command
+ * @params: the parameters supplied to the OS Command
+ *
+ * All normal calls to Trusted OS uses this struct. If cmd requires further
+ * information than what these field holds it can be passed as a parameter
+ * tagged as meta (setting the OPTEE_MSG_ATTR_META bit in corresponding
+ * attrs field). All parameters tagged as meta has to come first.
+ *
+ * Temp memref parameters can be fragmented if supported by the Trusted OS
+ * (when optee_smc.h is bearer of this protocol this is indicated with
+ * OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM). If a logical memref parameter is
+ * fragmented then has all but the last fragment the
+ * OPTEE_MSG_ATTR_FRAGMENT bit set in attrs. Even if a memref is fragmented
+ * it will still be presented as a single logical memref to the Trusted
+ * Application.
+ */
+struct optee_msg_arg {
+ u32 cmd;
+ u32 func;
+ u32 session;
+ u32 cancel_id;
+ u32 pad;
+ u32 ret;
+ u32 ret_origin;
+ u32 num_params;
+
+ /* num_params tells the actual number of element in params */
+ struct optee_msg_param params[0];
+};
+
+/**
+ * OPTEE_MSG_GET_ARG_SIZE - return size of struct optee_msg_arg
+ *
+ * @num_params: Number of parameters embedded in the struct optee_msg_arg
+ *
+ * Returns the size of the struct optee_msg_arg together with the number
+ * of embedded parameters.
+ */
+#define OPTEE_MSG_GET_ARG_SIZE(num_params) \
+ (sizeof(struct optee_msg_arg) + \
+ sizeof(struct optee_msg_param) * (num_params))
+
+/*****************************************************************************
+ * Part 2 - requests from normal world
+ *****************************************************************************/
+
+/*
+ * Return the following UID if using API specified in this file without
+ * further extensions:
+ * 384fb3e0-e7f8-11e3-af63-0002a5d5c51b.
+ * Represented in 4 32-bit words in OPTEE_MSG_UID_0, OPTEE_MSG_UID_1,
+ * OPTEE_MSG_UID_2, OPTEE_MSG_UID_3.
+ */
+#define OPTEE_MSG_UID_0 0x384fb3e0
+#define OPTEE_MSG_UID_1 0xe7f811e3
+#define OPTEE_MSG_UID_2 0xaf630002
+#define OPTEE_MSG_UID_3 0xa5d5c51b
+#define OPTEE_MSG_FUNCID_CALLS_UID 0xFF01
+
+/*
+ * Returns 2.0 if using API specified in this file without further
+ * extensions. Represented in 2 32-bit words in OPTEE_MSG_REVISION_MAJOR
+ * and OPTEE_MSG_REVISION_MINOR
+ */
+#define OPTEE_MSG_REVISION_MAJOR 2
+#define OPTEE_MSG_REVISION_MINOR 0
+#define OPTEE_MSG_FUNCID_CALLS_REVISION 0xFF03
+
+/*
+ * Get UUID of Trusted OS.
+ *
+ * Used by non-secure world to figure out which Trusted OS is installed.
+ * Note that returned UUID is the UUID of the Trusted OS, not of the API.
+ *
+ * Returns UUID in 4 32-bit words in the same way as
+ * OPTEE_MSG_FUNCID_CALLS_UID described above.
+ */
+#define OPTEE_MSG_OS_OPTEE_UUID_0 0x486178e0
+#define OPTEE_MSG_OS_OPTEE_UUID_1 0xe7f811e3
+#define OPTEE_MSG_OS_OPTEE_UUID_2 0xbc5e0002
+#define OPTEE_MSG_OS_OPTEE_UUID_3 0xa5d5c51b
+#define OPTEE_MSG_FUNCID_GET_OS_UUID 0x0000
+
+/*
+ * Get revision of Trusted OS.
+ *
+ * Used by non-secure world to figure out which version of the Trusted OS
+ * is installed. Note that the returned revision is the revision of the
+ * Trusted OS, not of the API.
+ *
+ * Returns revision in 2 32-bit words in the same way as
+ * OPTEE_MSG_CALLS_REVISION described above.
+ */
+#define OPTEE_MSG_FUNCID_GET_OS_REVISION 0x0001
+
+/*
+ * Do a secure call with struct optee_msg_arg as argument
+ * The OPTEE_MSG_CMD_* below defines what goes in struct optee_msg_arg::cmd
+ *
+ * OPTEE_MSG_CMD_OPEN_SESSION opens a session to a Trusted Application.
+ * The first two parameters are tagged as meta, holding two value
+ * parameters to pass the following information:
+ * param[0].u.value.a-b uuid of Trusted Application
+ * param[1].u.value.a-b uuid of Client
+ * param[1].u.value.c Login class of client OPTEE_MSG_LOGIN_*
+ *
+ * OPTEE_MSG_CMD_INVOKE_COMMAND invokes a command a previously opened
+ * session to a Trusted Application. struct optee_msg_arg::func is Trusted
+ * Application function, specific to the Trusted Application.
+ *
+ * OPTEE_MSG_CMD_CLOSE_SESSION closes a previously opened session to
+ * Trusted Application.
+ *
+ * OPTEE_MSG_CMD_CANCEL cancels a currently invoked command.
+ *
+ * OPTEE_MSG_CMD_REGISTER_SHM registers a shared memory reference. The
+ * information is passed as:
+ * [in] param[0].attr OPTEE_MSG_ATTR_TYPE_TMEM_INPUT
+ * [| OPTEE_MSG_ATTR_FRAGMENT]
+ * [in] param[0].u.tmem.buf_ptr physical address (of first fragment)
+ * [in] param[0].u.tmem.size size (of first fragment)
+ * [in] param[0].u.tmem.shm_ref holds shared memory reference
+ * ...
+ * The shared memory can optionally be fragmented, temp memrefs can follow
+ * each other with all but the last with the OPTEE_MSG_ATTR_FRAGMENT bit set.
+ *
+ * OPTEE_MSG_CMD_UNREGISTER_SHM unregisteres a previously registered shared
+ * memory reference. The information is passed as:
+ * [in] param[0].attr OPTEE_MSG_ATTR_TYPE_RMEM_INPUT
+ * [in] param[0].u.rmem.shm_ref holds shared memory reference
+ * [in] param[0].u.rmem.offs 0
+ * [in] param[0].u.rmem.size 0
+ */
+#define OPTEE_MSG_CMD_OPEN_SESSION 0
+#define OPTEE_MSG_CMD_INVOKE_COMMAND 1
+#define OPTEE_MSG_CMD_CLOSE_SESSION 2
+#define OPTEE_MSG_CMD_CANCEL 3
+#define OPTEE_MSG_CMD_REGISTER_SHM 4
+#define OPTEE_MSG_CMD_UNREGISTER_SHM 5
+#define OPTEE_MSG_FUNCID_CALL_WITH_ARG 0x0004
+
+/*****************************************************************************
+ * Part 3 - Requests from secure world, RPC
+ *****************************************************************************/
+
+/*
+ * All RPC is done with a struct optee_msg_arg as bearer of information,
+ * struct optee_msg_arg::arg holds values defined by OPTEE_MSG_RPC_CMD_* below
+ *
+ * RPC communication with tee-supplicant is reversed compared to normal
+ * client communication desribed above. The supplicant receives requests
+ * and sends responses.
+ */
+
+/*
+ * Load a TA into memory, defined in tee-supplicant
+ */
+#define OPTEE_MSG_RPC_CMD_LOAD_TA 0
+
+/*
+ * Reserved
+ */
+#define OPTEE_MSG_RPC_CMD_RPMB 1
+
+/*
+ * File system access, defined in tee-supplicant
+ */
+#define OPTEE_MSG_RPC_CMD_FS 2
+
+/*
+ * Get time
+ *
+ * Returns number of seconds and nano seconds since the Epoch,
+ * 1970-01-01 00:00:00 +0000 (UTC).
+ *
+ * [out] param[0].u.value.a Number of seconds
+ * [out] param[0].u.value.b Number of nano seconds.
+ */
+#define OPTEE_MSG_RPC_CMD_GET_TIME 3
+
+/*
+ * Wait queue primitive, helper for secure world to implement a wait queue.
+ *
+ * If secure world need to wait for a secure world mutex it issues a sleep
+ * request instead of spinning in secure world. Conversely is a wakeup
+ * request issued when a secure world mutex with a thread waiting thread is
+ * unlocked.
+ *
+ * Waiting on a key
+ * [in] param[0].u.value.a OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP
+ * [in] param[0].u.value.b wait key
+ *
+ * Waking up a key
+ * [in] param[0].u.value.a OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP
+ * [in] param[0].u.value.b wakeup key
+ */
+#define OPTEE_MSG_RPC_CMD_WAIT_QUEUE 4
+#define OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP 0
+#define OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP 1
+
+/*
+ * Suspend execution
+ *
+ * [in] param[0].value .a number of milliseconds to suspend
+ */
+#define OPTEE_MSG_RPC_CMD_SUSPEND 5
+
+/*
+ * Allocate a piece of shared memory
+ *
+ * Shared memory can optionally be fragmented, to support that additional
+ * spare param entries are allocated to make room for eventual fragments.
+ * The spare param entries has .attr = OPTEE_MSG_ATTR_TYPE_NONE when
+ * unused. All returned temp memrefs except the last should have the
+ * OPTEE_MSG_ATTR_FRAGMENT bit set in the attr field.
+ *
+ * [in] param[0].u.value.a type of memory one of
+ * OPTEE_MSG_RPC_SHM_TYPE_* below
+ * [in] param[0].u.value.b requested size
+ * [in] param[0].u.value.c required alignment
+ *
+ * [out] param[0].u.tmem.buf_ptr physical address (of first fragment)
+ * [out] param[0].u.tmem.size size (of first fragment)
+ * [out] param[0].u.tmem.shm_ref shared memory reference
+ * ...
+ * [out] param[n].u.tmem.buf_ptr physical address
+ * [out] param[n].u.tmem.size size
+ * [out] param[n].u.tmem.shm_ref shared memory reference (same value
+ * as in param[n-1].u.tmem.shm_ref)
+ */
+#define OPTEE_MSG_RPC_CMD_SHM_ALLOC 6
+/* Memory that can be shared with a non-secure user space application */
+#define OPTEE_MSG_RPC_SHM_TYPE_APPL 0
+/* Memory only shared with non-secure kernel */
+#define OPTEE_MSG_RPC_SHM_TYPE_KERNEL 1
+
+/*
+ * Free shared memory previously allocated with OPTEE_MSG_RPC_CMD_SHM_ALLOC
+ *
+ * [in] param[0].u.value.a type of memory one of
+ * OPTEE_MSG_RPC_SHM_TYPE_* above
+ * [in] param[0].u.value.b value of shared memory reference
+ * returned in param[0].u.tmem.shm_ref
+ * above
+ */
+#define OPTEE_MSG_RPC_CMD_SHM_FREE 7
+
+#endif /* _OPTEE_MSG_H */
diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h
new file mode 100644
index 0000000..35e7938
--- /dev/null
+++ b/drivers/tee/optee/optee_private.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef OPTEE_PRIVATE_H
+#define OPTEE_PRIVATE_H
+
+#include <linux/arm-smccc.h>
+#include <linux/semaphore.h>
+#include <linux/tee_drv.h>
+#include <linux/types.h>
+#include "optee_msg.h"
+
+#define OPTEE_MAX_ARG_SIZE 1024
+
+/* Some Global Platform error codes used in this driver */
+#define TEEC_SUCCESS 0x00000000
+#define TEEC_ERROR_BAD_PARAMETERS 0xFFFF0006
+#define TEEC_ERROR_COMMUNICATION 0xFFFF000E
+#define TEEC_ERROR_OUT_OF_MEMORY 0xFFFF000C
+
+#define TEEC_ORIGIN_COMMS 0x00000002
+
+typedef void (optee_invoke_fn)(unsigned long, unsigned long, unsigned long,
+ unsigned long, unsigned long, unsigned long,
+ unsigned long, unsigned long,
+ struct arm_smccc_res *);
+
+struct optee_call_queue {
+ /* Serializes access to this struct */
+ struct mutex mutex;
+ struct list_head waiters;
+};
+
+struct optee_wait_queue {
+ /* Serializes access to this struct */
+ struct mutex mu;
+ struct list_head db;
+};
+
+/**
+ * struct optee_supp - supplicant synchronization struct
+ * @ctx the context of current connected supplicant.
+ * if !NULL the supplicant device is available for use,
+ * else busy
+ * @mutex: held while accessing content of this struct
+ * @req_id: current request id if supplicant is doing synchronous
+ * communication, else -1
+ * @reqs: queued request not yet retrieved by supplicant
+ * @idr: IDR holding all requests currently being processed
+ * by supplicant
+ * @reqs_c: completion used by supplicant when waiting for a
+ * request to be queued.
+ */
+struct optee_supp {
+ /* Serializes access to this struct */
+ struct mutex mutex;
+ struct tee_context *ctx;
+
+ int req_id;
+ struct list_head reqs;
+ struct idr idr;
+ struct completion reqs_c;
+};
+
+/**
+ * struct optee - main service struct
+ * @supp_teedev: supplicant device
+ * @teedev: client device
+ * @invoke_fn: function to issue smc or hvc
+ * @call_queue: queue of threads waiting to call @invoke_fn
+ * @wait_queue: queue of threads from secure world waiting for a
+ * secure world sync object
+ * @supp: supplicant synchronization struct for RPC to supplicant
+ * @pool: shared memory pool
+ * @memremaped_shm virtual address of memory in shared memory pool
+ * @sec_caps: secure world capabilities defined by
+ * OPTEE_SMC_SEC_CAP_* in optee_smc.h
+ */
+struct optee {
+ struct tee_device *supp_teedev;
+ struct tee_device *teedev;
+ optee_invoke_fn *invoke_fn;
+ struct optee_call_queue call_queue;
+ struct optee_wait_queue wait_queue;
+ struct optee_supp supp;
+ struct tee_shm_pool *pool;
+ void *memremaped_shm;
+ u32 sec_caps;
+};
+
+struct optee_session {
+ struct list_head list_node;
+ u32 session_id;
+};
+
+struct optee_context_data {
+ /* Serializes access to this struct */
+ struct mutex mutex;
+ struct list_head sess_list;
+};
+
+struct optee_rpc_param {
+ u32 a0;
+ u32 a1;
+ u32 a2;
+ u32 a3;
+ u32 a4;
+ u32 a5;
+ u32 a6;
+ u32 a7;
+};
+
+/* Holds context that is preserved during one STD call */
+struct optee_call_ctx {
+ /* information about pages list used in last allocation */
+ void *pages_list;
+ size_t num_entries;
+};
+
+void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param,
+ struct optee_call_ctx *call_ctx);
+void optee_rpc_finalize_call(struct optee_call_ctx *call_ctx);
+
+void optee_wait_queue_init(struct optee_wait_queue *wq);
+void optee_wait_queue_exit(struct optee_wait_queue *wq);
+
+u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params,
+ struct tee_param *param);
+
+int optee_supp_read(struct tee_context *ctx, void __user *buf, size_t len);
+int optee_supp_write(struct tee_context *ctx, void __user *buf, size_t len);
+void optee_supp_init(struct optee_supp *supp);
+void optee_supp_uninit(struct optee_supp *supp);
+void optee_supp_release(struct optee_supp *supp);
+
+int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params,
+ struct tee_param *param);
+int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params,
+ struct tee_param *param);
+
+u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg);
+int optee_open_session(struct tee_context *ctx,
+ struct tee_ioctl_open_session_arg *arg,
+ struct tee_param *param);
+int optee_close_session(struct tee_context *ctx, u32 session);
+int optee_invoke_func(struct tee_context *ctx, struct tee_ioctl_invoke_arg *arg,
+ struct tee_param *param);
+int optee_cancel_req(struct tee_context *ctx, u32 cancel_id, u32 session);
+
+void optee_enable_shm_cache(struct optee *optee);
+void optee_disable_shm_cache(struct optee *optee);
+
+int optee_shm_register(struct tee_context *ctx, struct tee_shm *shm,
+ struct page **pages, size_t num_pages,
+ unsigned long start);
+int optee_shm_unregister(struct tee_context *ctx, struct tee_shm *shm);
+
+int optee_shm_register_supp(struct tee_context *ctx, struct tee_shm *shm,
+ struct page **pages, size_t num_pages,
+ unsigned long start);
+int optee_shm_unregister_supp(struct tee_context *ctx, struct tee_shm *shm);
+
+int optee_from_msg_param(struct tee_param *params, size_t num_params,
+ const struct optee_msg_param *msg_params);
+int optee_to_msg_param(struct optee_msg_param *msg_params, size_t num_params,
+ const struct tee_param *params);
+
+u64 *optee_allocate_pages_list(size_t num_entries);
+void optee_free_pages_list(void *array, size_t num_entries);
+void optee_fill_pages_list(u64 *dst, struct page **pages, int num_pages,
+ size_t page_offset);
+
+/*
+ * Small helpers
+ */
+
+static inline void *reg_pair_to_ptr(u32 reg0, u32 reg1)
+{
+ return (void *)(unsigned long)(((u64)reg0 << 32) | reg1);
+}
+
+static inline void reg_pair_from_64(u32 *reg0, u32 *reg1, u64 val)
+{
+ *reg0 = val >> 32;
+ *reg1 = val;
+}
+
+#endif /*OPTEE_PRIVATE_H*/
diff --git a/drivers/tee/optee/optee_smc.h b/drivers/tee/optee/optee_smc.h
new file mode 100644
index 0000000..7cd3272
--- /dev/null
+++ b/drivers/tee/optee/optee_smc.h
@@ -0,0 +1,457 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef OPTEE_SMC_H
+#define OPTEE_SMC_H
+
+#include <linux/arm-smccc.h>
+#include <linux/bitops.h>
+
+#define OPTEE_SMC_STD_CALL_VAL(func_num) \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, ARM_SMCCC_SMC_32, \
+ ARM_SMCCC_OWNER_TRUSTED_OS, (func_num))
+#define OPTEE_SMC_FAST_CALL_VAL(func_num) \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \
+ ARM_SMCCC_OWNER_TRUSTED_OS, (func_num))
+
+/*
+ * Function specified by SMC Calling convention.
+ */
+#define OPTEE_SMC_FUNCID_CALLS_COUNT 0xFF00
+#define OPTEE_SMC_CALLS_COUNT \
+ ARM_SMCCC_CALL_VAL(OPTEE_SMC_FAST_CALL, SMCCC_SMC_32, \
+ SMCCC_OWNER_TRUSTED_OS_END, \
+ OPTEE_SMC_FUNCID_CALLS_COUNT)
+
+/*
+ * Normal cached memory (write-back), shareable for SMP systems and not
+ * shareable for UP systems.
+ */
+#define OPTEE_SMC_SHM_CACHED 1
+
+/*
+ * a0..a7 is used as register names in the descriptions below, on arm32
+ * that translates to r0..r7 and on arm64 to w0..w7. In both cases it's
+ * 32-bit registers.
+ */
+
+/*
+ * Function specified by SMC Calling convention
+ *
+ * Return one of the following UIDs if using API specified in this file
+ * without further extentions:
+ * 65cb6b93-af0c-4617-8ed6-644a8d1140f8
+ * see also OPTEE_SMC_UID_* in optee_msg.h
+ */
+#define OPTEE_SMC_FUNCID_CALLS_UID OPTEE_MSG_FUNCID_CALLS_UID
+#define OPTEE_SMC_CALLS_UID \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \
+ ARM_SMCCC_OWNER_TRUSTED_OS_END, \
+ OPTEE_SMC_FUNCID_CALLS_UID)
+
+/*
+ * Function specified by SMC Calling convention
+ *
+ * Returns 2.0 if using API specified in this file without further extentions.
+ * see also OPTEE_MSG_REVISION_* in optee_msg.h
+ */
+#define OPTEE_SMC_FUNCID_CALLS_REVISION OPTEE_MSG_FUNCID_CALLS_REVISION
+#define OPTEE_SMC_CALLS_REVISION \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_32, \
+ ARM_SMCCC_OWNER_TRUSTED_OS_END, \
+ OPTEE_SMC_FUNCID_CALLS_REVISION)
+
+struct optee_smc_calls_revision_result {
+ unsigned long major;
+ unsigned long minor;
+ unsigned long reserved0;
+ unsigned long reserved1;
+};
+
+/*
+ * Get UUID of Trusted OS.
+ *
+ * Used by non-secure world to figure out which Trusted OS is installed.
+ * Note that returned UUID is the UUID of the Trusted OS, not of the API.
+ *
+ * Returns UUID in a0-4 in the same way as OPTEE_SMC_CALLS_UID
+ * described above.
+ */
+#define OPTEE_SMC_FUNCID_GET_OS_UUID OPTEE_MSG_FUNCID_GET_OS_UUID
+#define OPTEE_SMC_CALL_GET_OS_UUID \
+ OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_OS_UUID)
+
+/*
+ * Get revision of Trusted OS.
+ *
+ * Used by non-secure world to figure out which version of the Trusted OS
+ * is installed. Note that the returned revision is the revision of the
+ * Trusted OS, not of the API.
+ *
+ * Returns revision in a0-1 in the same way as OPTEE_SMC_CALLS_REVISION
+ * described above.
+ */
+#define OPTEE_SMC_FUNCID_GET_OS_REVISION OPTEE_MSG_FUNCID_GET_OS_REVISION
+#define OPTEE_SMC_CALL_GET_OS_REVISION \
+ OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_OS_REVISION)
+
+/*
+ * Call with struct optee_msg_arg as argument
+ *
+ * Call register usage:
+ * a0 SMC Function ID, OPTEE_SMC*CALL_WITH_ARG
+ * a1 Upper 32bit of a 64bit physical pointer to a struct optee_msg_arg
+ * a2 Lower 32bit of a 64bit physical pointer to a struct optee_msg_arg
+ * a3 Cache settings, not used if physical pointer is in a predefined shared
+ * memory area else per OPTEE_SMC_SHM_*
+ * a4-6 Not used
+ * a7 Hypervisor Client ID register
+ *
+ * Normal return register usage:
+ * a0 Return value, OPTEE_SMC_RETURN_*
+ * a1-3 Not used
+ * a4-7 Preserved
+ *
+ * OPTEE_SMC_RETURN_ETHREAD_LIMIT return register usage:
+ * a0 Return value, OPTEE_SMC_RETURN_ETHREAD_LIMIT
+ * a1-3 Preserved
+ * a4-7 Preserved
+ *
+ * RPC return register usage:
+ * a0 Return value, OPTEE_SMC_RETURN_IS_RPC(val)
+ * a1-2 RPC parameters
+ * a3-7 Resume information, must be preserved
+ *
+ * Possible return values:
+ * OPTEE_SMC_RETURN_UNKNOWN_FUNCTION Trusted OS does not recognize this
+ * function.
+ * OPTEE_SMC_RETURN_OK Call completed, result updated in
+ * the previously supplied struct
+ * optee_msg_arg.
+ * OPTEE_SMC_RETURN_ETHREAD_LIMIT Number of Trusted OS threads exceeded,
+ * try again later.
+ * OPTEE_SMC_RETURN_EBADADDR Bad physcial pointer to struct
+ * optee_msg_arg.
+ * OPTEE_SMC_RETURN_EBADCMD Bad/unknown cmd in struct optee_msg_arg
+ * OPTEE_SMC_RETURN_IS_RPC() Call suspended by RPC call to normal
+ * world.
+ */
+#define OPTEE_SMC_FUNCID_CALL_WITH_ARG OPTEE_MSG_FUNCID_CALL_WITH_ARG
+#define OPTEE_SMC_CALL_WITH_ARG \
+ OPTEE_SMC_STD_CALL_VAL(OPTEE_SMC_FUNCID_CALL_WITH_ARG)
+
+/*
+ * Get Shared Memory Config
+ *
+ * Returns the Secure/Non-secure shared memory config.
+ *
+ * Call register usage:
+ * a0 SMC Function ID, OPTEE_SMC_GET_SHM_CONFIG
+ * a1-6 Not used
+ * a7 Hypervisor Client ID register
+ *
+ * Have config return register usage:
+ * a0 OPTEE_SMC_RETURN_OK
+ * a1 Physical address of start of SHM
+ * a2 Size of of SHM
+ * a3 Cache settings of memory, as defined by the
+ * OPTEE_SMC_SHM_* values above
+ * a4-7 Preserved
+ *
+ * Not available register usage:
+ * a0 OPTEE_SMC_RETURN_ENOTAVAIL
+ * a1-3 Not used
+ * a4-7 Preserved
+ */
+#define OPTEE_SMC_FUNCID_GET_SHM_CONFIG 7
+#define OPTEE_SMC_GET_SHM_CONFIG \
+ OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_GET_SHM_CONFIG)
+
+struct optee_smc_get_shm_config_result {
+ unsigned long status;
+ unsigned long start;
+ unsigned long size;
+ unsigned long settings;
+};
+
+/*
+ * Exchanges capabilities between normal world and secure world
+ *
+ * Call register usage:
+ * a0 SMC Function ID, OPTEE_SMC_EXCHANGE_CAPABILITIES
+ * a1 bitfield of normal world capabilities OPTEE_SMC_NSEC_CAP_*
+ * a2-6 Not used
+ * a7 Hypervisor Client ID register
+ *
+ * Normal return register usage:
+ * a0 OPTEE_SMC_RETURN_OK
+ * a1 bitfield of secure world capabilities OPTEE_SMC_SEC_CAP_*
+ * a2-7 Preserved
+ *
+ * Error return register usage:
+ * a0 OPTEE_SMC_RETURN_ENOTAVAIL, can't use the capabilities from normal world
+ * a1 bitfield of secure world capabilities OPTEE_SMC_SEC_CAP_*
+ * a2-7 Preserved
+ */
+/* Normal world works as a uniprocessor system */
+#define OPTEE_SMC_NSEC_CAP_UNIPROCESSOR BIT(0)
+/* Secure world has reserved shared memory for normal world to use */
+#define OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM BIT(0)
+/* Secure world can communicate via previously unregistered shared memory */
+#define OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM BIT(1)
+
+/*
+ * Secure world supports commands "register/unregister shared memory",
+ * secure world accepts command buffers located in any parts of non-secure RAM
+ */
+#define OPTEE_SMC_SEC_CAP_DYNAMIC_SHM BIT(2)
+
+#define OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES 9
+#define OPTEE_SMC_EXCHANGE_CAPABILITIES \
+ OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_EXCHANGE_CAPABILITIES)
+
+struct optee_smc_exchange_capabilities_result {
+ unsigned long status;
+ unsigned long capabilities;
+ unsigned long reserved0;
+ unsigned long reserved1;
+};
+
+/*
+ * Disable and empties cache of shared memory objects
+ *
+ * Secure world can cache frequently used shared memory objects, for
+ * example objects used as RPC arguments. When secure world is idle this
+ * function returns one shared memory reference to free. To disable the
+ * cache and free all cached objects this function has to be called until
+ * it returns OPTEE_SMC_RETURN_ENOTAVAIL.
+ *
+ * Call register usage:
+ * a0 SMC Function ID, OPTEE_SMC_DISABLE_SHM_CACHE
+ * a1-6 Not used
+ * a7 Hypervisor Client ID register
+ *
+ * Normal return register usage:
+ * a0 OPTEE_SMC_RETURN_OK
+ * a1 Upper 32bit of a 64bit Shared memory cookie
+ * a2 Lower 32bit of a 64bit Shared memory cookie
+ * a3-7 Preserved
+ *
+ * Cache empty return register usage:
+ * a0 OPTEE_SMC_RETURN_ENOTAVAIL
+ * a1-7 Preserved
+ *
+ * Not idle return register usage:
+ * a0 OPTEE_SMC_RETURN_EBUSY
+ * a1-7 Preserved
+ */
+#define OPTEE_SMC_FUNCID_DISABLE_SHM_CACHE 10
+#define OPTEE_SMC_DISABLE_SHM_CACHE \
+ OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_DISABLE_SHM_CACHE)
+
+struct optee_smc_disable_shm_cache_result {
+ unsigned long status;
+ unsigned long shm_upper32;
+ unsigned long shm_lower32;
+ unsigned long reserved0;
+};
+
+/*
+ * Enable cache of shared memory objects
+ *
+ * Secure world can cache frequently used shared memory objects, for
+ * example objects used as RPC arguments. When secure world is idle this
+ * function returns OPTEE_SMC_RETURN_OK and the cache is enabled. If
+ * secure world isn't idle OPTEE_SMC_RETURN_EBUSY is returned.
+ *
+ * Call register usage:
+ * a0 SMC Function ID, OPTEE_SMC_ENABLE_SHM_CACHE
+ * a1-6 Not used
+ * a7 Hypervisor Client ID register
+ *
+ * Normal return register usage:
+ * a0 OPTEE_SMC_RETURN_OK
+ * a1-7 Preserved
+ *
+ * Not idle return register usage:
+ * a0 OPTEE_SMC_RETURN_EBUSY
+ * a1-7 Preserved
+ */
+#define OPTEE_SMC_FUNCID_ENABLE_SHM_CACHE 11
+#define OPTEE_SMC_ENABLE_SHM_CACHE \
+ OPTEE_SMC_FAST_CALL_VAL(OPTEE_SMC_FUNCID_ENABLE_SHM_CACHE)
+
+/*
+ * Resume from RPC (for example after processing a foreign interrupt)
+ *
+ * Call register usage:
+ * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC
+ * a1-3 Value of a1-3 when OPTEE_SMC_CALL_WITH_ARG returned
+ * OPTEE_SMC_RETURN_RPC in a0
+ *
+ * Return register usage is the same as for OPTEE_SMC_*CALL_WITH_ARG above.
+ *
+ * Possible return values
+ * OPTEE_SMC_RETURN_UNKNOWN_FUNCTION Trusted OS does not recognize this
+ * function.
+ * OPTEE_SMC_RETURN_OK Original call completed, result
+ * updated in the previously supplied.
+ * struct optee_msg_arg
+ * OPTEE_SMC_RETURN_RPC Call suspended by RPC call to normal
+ * world.
+ * OPTEE_SMC_RETURN_ERESUME Resume failed, the opaque resume
+ * information was corrupt.
+ */
+#define OPTEE_SMC_FUNCID_RETURN_FROM_RPC 3
+#define OPTEE_SMC_CALL_RETURN_FROM_RPC \
+ OPTEE_SMC_STD_CALL_VAL(OPTEE_SMC_FUNCID_RETURN_FROM_RPC)
+
+#define OPTEE_SMC_RETURN_RPC_PREFIX_MASK 0xFFFF0000
+#define OPTEE_SMC_RETURN_RPC_PREFIX 0xFFFF0000
+#define OPTEE_SMC_RETURN_RPC_FUNC_MASK 0x0000FFFF
+
+#define OPTEE_SMC_RETURN_GET_RPC_FUNC(ret) \
+ ((ret) & OPTEE_SMC_RETURN_RPC_FUNC_MASK)
+
+#define OPTEE_SMC_RPC_VAL(func) ((func) | OPTEE_SMC_RETURN_RPC_PREFIX)
+
+/*
+ * Allocate memory for RPC parameter passing. The memory is used to hold a
+ * struct optee_msg_arg.
+ *
+ * "Call" register usage:
+ * a0 This value, OPTEE_SMC_RETURN_RPC_ALLOC
+ * a1 Size in bytes of required argument memory
+ * a2 Not used
+ * a3 Resume information, must be preserved
+ * a4-5 Not used
+ * a6-7 Resume information, must be preserved
+ *
+ * "Return" register usage:
+ * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
+ * a1 Upper 32bits of 64bit physical pointer to allocated
+ * memory, (a1 == 0 && a2 == 0) if size was 0 or if memory can't
+ * be allocated.
+ * a2 Lower 32bits of 64bit physical pointer to allocated
+ * memory, (a1 == 0 && a2 == 0) if size was 0 or if memory can't
+ * be allocated
+ * a3 Preserved
+ * a4 Upper 32bits of 64bit Shared memory cookie used when freeing
+ * the memory or doing an RPC
+ * a5 Lower 32bits of 64bit Shared memory cookie used when freeing
+ * the memory or doing an RPC
+ * a6-7 Preserved
+ */
+#define OPTEE_SMC_RPC_FUNC_ALLOC 0
+#define OPTEE_SMC_RETURN_RPC_ALLOC \
+ OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_ALLOC)
+
+/*
+ * Free memory previously allocated by OPTEE_SMC_RETURN_RPC_ALLOC
+ *
+ * "Call" register usage:
+ * a0 This value, OPTEE_SMC_RETURN_RPC_FREE
+ * a1 Upper 32bits of 64bit shared memory cookie belonging to this
+ * argument memory
+ * a2 Lower 32bits of 64bit shared memory cookie belonging to this
+ * argument memory
+ * a3-7 Resume information, must be preserved
+ *
+ * "Return" register usage:
+ * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
+ * a1-2 Not used
+ * a3-7 Preserved
+ */
+#define OPTEE_SMC_RPC_FUNC_FREE 2
+#define OPTEE_SMC_RETURN_RPC_FREE \
+ OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_FREE)
+
+/*
+ * Deliver foreign interrupt to normal world.
+ *
+ * "Call" register usage:
+ * a0 OPTEE_SMC_RETURN_RPC_FOREIGN_INTR
+ * a1-7 Resume information, must be preserved
+ *
+ * "Return" register usage:
+ * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
+ * a1-7 Preserved
+ */
+#define OPTEE_SMC_RPC_FUNC_FOREIGN_INTR 4
+#define OPTEE_SMC_RETURN_RPC_FOREIGN_INTR \
+ OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_FOREIGN_INTR)
+
+/*
+ * Do an RPC request. The supplied struct optee_msg_arg tells which
+ * request to do and the parameters for the request. The following fields
+ * are used (the rest are unused):
+ * - cmd the Request ID
+ * - ret return value of the request, filled in by normal world
+ * - num_params number of parameters for the request
+ * - params the parameters
+ * - param_attrs attributes of the parameters
+ *
+ * "Call" register usage:
+ * a0 OPTEE_SMC_RETURN_RPC_CMD
+ * a1 Upper 32bit of a 64bit Shared memory cookie holding a
+ * struct optee_msg_arg, must be preserved, only the data should
+ * be updated
+ * a2 Lower 32bit of a 64bit Shared memory cookie holding a
+ * struct optee_msg_arg, must be preserved, only the data should
+ * be updated
+ * a3-7 Resume information, must be preserved
+ *
+ * "Return" register usage:
+ * a0 SMC Function ID, OPTEE_SMC_CALL_RETURN_FROM_RPC.
+ * a1-2 Not used
+ * a3-7 Preserved
+ */
+#define OPTEE_SMC_RPC_FUNC_CMD 5
+#define OPTEE_SMC_RETURN_RPC_CMD \
+ OPTEE_SMC_RPC_VAL(OPTEE_SMC_RPC_FUNC_CMD)
+
+/* Returned in a0 */
+#define OPTEE_SMC_RETURN_UNKNOWN_FUNCTION 0xFFFFFFFF
+
+/* Returned in a0 only from Trusted OS functions */
+#define OPTEE_SMC_RETURN_OK 0x0
+#define OPTEE_SMC_RETURN_ETHREAD_LIMIT 0x1
+#define OPTEE_SMC_RETURN_EBUSY 0x2
+#define OPTEE_SMC_RETURN_ERESUME 0x3
+#define OPTEE_SMC_RETURN_EBADADDR 0x4
+#define OPTEE_SMC_RETURN_EBADCMD 0x5
+#define OPTEE_SMC_RETURN_ENOMEM 0x6
+#define OPTEE_SMC_RETURN_ENOTAVAIL 0x7
+#define OPTEE_SMC_RETURN_IS_RPC(ret) __optee_smc_return_is_rpc((ret))
+
+static inline bool __optee_smc_return_is_rpc(u32 ret)
+{
+ return ret != OPTEE_SMC_RETURN_UNKNOWN_FUNCTION &&
+ (ret & OPTEE_SMC_RETURN_RPC_PREFIX_MASK) ==
+ OPTEE_SMC_RETURN_RPC_PREFIX;
+}
+
+#endif /* OPTEE_SMC_H */
diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c
new file mode 100644
index 0000000..41aea12
--- /dev/null
+++ b/drivers/tee/optee/rpc.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include "optee_private.h"
+#include "optee_smc.h"
+
+struct wq_entry {
+ struct list_head link;
+ struct completion c;
+ u32 key;
+};
+
+void optee_wait_queue_init(struct optee_wait_queue *priv)
+{
+ mutex_init(&priv->mu);
+ INIT_LIST_HEAD(&priv->db);
+}
+
+void optee_wait_queue_exit(struct optee_wait_queue *priv)
+{
+ mutex_destroy(&priv->mu);
+}
+
+static void handle_rpc_func_cmd_get_time(struct optee_msg_arg *arg)
+{
+ struct timespec64 ts;
+
+ if (arg->num_params != 1)
+ goto bad;
+ if ((arg->params[0].attr & OPTEE_MSG_ATTR_TYPE_MASK) !=
+ OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT)
+ goto bad;
+
+ getnstimeofday64(&ts);
+ arg->params[0].u.value.a = ts.tv_sec;
+ arg->params[0].u.value.b = ts.tv_nsec;
+
+ arg->ret = TEEC_SUCCESS;
+ return;
+bad:
+ arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+}
+
+static struct wq_entry *wq_entry_get(struct optee_wait_queue *wq, u32 key)
+{
+ struct wq_entry *w;
+
+ mutex_lock(&wq->mu);
+
+ list_for_each_entry(w, &wq->db, link)
+ if (w->key == key)
+ goto out;
+
+ w = kmalloc(sizeof(*w), GFP_KERNEL);
+ if (w) {
+ init_completion(&w->c);
+ w->key = key;
+ list_add_tail(&w->link, &wq->db);
+ }
+out:
+ mutex_unlock(&wq->mu);
+ return w;
+}
+
+static void wq_sleep(struct optee_wait_queue *wq, u32 key)
+{
+ struct wq_entry *w = wq_entry_get(wq, key);
+
+ if (w) {
+ wait_for_completion(&w->c);
+ mutex_lock(&wq->mu);
+ list_del(&w->link);
+ mutex_unlock(&wq->mu);
+ kfree(w);
+ }
+}
+
+static void wq_wakeup(struct optee_wait_queue *wq, u32 key)
+{
+ struct wq_entry *w = wq_entry_get(wq, key);
+
+ if (w)
+ complete(&w->c);
+}
+
+static void handle_rpc_func_cmd_wq(struct optee *optee,
+ struct optee_msg_arg *arg)
+{
+ if (arg->num_params != 1)
+ goto bad;
+
+ if ((arg->params[0].attr & OPTEE_MSG_ATTR_TYPE_MASK) !=
+ OPTEE_MSG_ATTR_TYPE_VALUE_INPUT)
+ goto bad;
+
+ switch (arg->params[0].u.value.a) {
+ case OPTEE_MSG_RPC_WAIT_QUEUE_SLEEP:
+ wq_sleep(&optee->wait_queue, arg->params[0].u.value.b);
+ break;
+ case OPTEE_MSG_RPC_WAIT_QUEUE_WAKEUP:
+ wq_wakeup(&optee->wait_queue, arg->params[0].u.value.b);
+ break;
+ default:
+ goto bad;
+ }
+
+ arg->ret = TEEC_SUCCESS;
+ return;
+bad:
+ arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+}
+
+static void handle_rpc_func_cmd_wait(struct optee_msg_arg *arg)
+{
+ u32 msec_to_wait;
+
+ if (arg->num_params != 1)
+ goto bad;
+
+ if ((arg->params[0].attr & OPTEE_MSG_ATTR_TYPE_MASK) !=
+ OPTEE_MSG_ATTR_TYPE_VALUE_INPUT)
+ goto bad;
+
+ msec_to_wait = arg->params[0].u.value.a;
+
+ /* Go to interruptible sleep */
+ msleep_interruptible(msec_to_wait);
+
+ arg->ret = TEEC_SUCCESS;
+ return;
+bad:
+ arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+}
+
+static void handle_rpc_supp_cmd(struct tee_context *ctx,
+ struct optee_msg_arg *arg)
+{
+ struct tee_param *params;
+
+ arg->ret_origin = TEEC_ORIGIN_COMMS;
+
+ params = kmalloc_array(arg->num_params, sizeof(struct tee_param),
+ GFP_KERNEL);
+ if (!params) {
+ arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
+ return;
+ }
+
+ if (optee_from_msg_param(params, arg->num_params, arg->params)) {
+ arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+ goto out;
+ }
+
+ arg->ret = optee_supp_thrd_req(ctx, arg->cmd, arg->num_params, params);
+
+ if (optee_to_msg_param(arg->params, arg->num_params, params))
+ arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+out:
+ kfree(params);
+}
+
+static struct tee_shm *cmd_alloc_suppl(struct tee_context *ctx, size_t sz)
+{
+ u32 ret;
+ struct tee_param param;
+ struct optee *optee = tee_get_drvdata(ctx->teedev);
+ struct tee_shm *shm;
+
+ param.attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT;
+ param.u.value.a = OPTEE_MSG_RPC_SHM_TYPE_APPL;
+ param.u.value.b = sz;
+ param.u.value.c = 0;
+
+ ret = optee_supp_thrd_req(ctx, OPTEE_MSG_RPC_CMD_SHM_ALLOC, 1, ¶m);
+ if (ret)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_lock(&optee->supp.mutex);
+ /* Increases count as secure world doesn't have a reference */
+ shm = tee_shm_get_from_id(optee->supp.ctx, param.u.value.c);
+ mutex_unlock(&optee->supp.mutex);
+ return shm;
+}
+
+static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx,
+ struct optee_msg_arg *arg,
+ struct optee_call_ctx *call_ctx)
+{
+ phys_addr_t pa;
+ struct tee_shm *shm;
+ size_t sz;
+ size_t n;
+
+ arg->ret_origin = TEEC_ORIGIN_COMMS;
+
+ if (!arg->num_params ||
+ arg->params[0].attr != OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) {
+ arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+ return;
+ }
+
+ for (n = 1; n < arg->num_params; n++) {
+ if (arg->params[n].attr != OPTEE_MSG_ATTR_TYPE_NONE) {
+ arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+ return;
+ }
+ }
+
+ sz = arg->params[0].u.value.b;
+ switch (arg->params[0].u.value.a) {
+ case OPTEE_MSG_RPC_SHM_TYPE_APPL:
+ shm = cmd_alloc_suppl(ctx, sz);
+ break;
+ case OPTEE_MSG_RPC_SHM_TYPE_KERNEL:
+ shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED);
+ break;
+ default:
+ arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+ return;
+ }
+
+ if (IS_ERR(shm)) {
+ arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
+ return;
+ }
+
+ if (tee_shm_get_pa(shm, 0, &pa)) {
+ arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+ goto bad;
+ }
+
+ sz = tee_shm_get_size(shm);
+
+ if (tee_shm_is_registered(shm)) {
+ struct page **pages;
+ u64 *pages_list;
+ size_t page_num;
+
+ pages = tee_shm_get_pages(shm, &page_num);
+ if (!pages || !page_num) {
+ arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
+ goto bad;
+ }
+
+ pages_list = optee_allocate_pages_list(page_num);
+ if (!pages_list) {
+ arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
+ goto bad;
+ }
+
+ call_ctx->pages_list = pages_list;
+ call_ctx->num_entries = page_num;
+
+ arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT |
+ OPTEE_MSG_ATTR_NONCONTIG;
+ /*
+ * In the least bits of u.tmem.buf_ptr we store buffer offset
+ * from 4k page, as described in OP-TEE ABI.
+ */
+ arg->params[0].u.tmem.buf_ptr = virt_to_phys(pages_list) |
+ (tee_shm_get_page_offset(shm) &
+ (OPTEE_MSG_NONCONTIG_PAGE_SIZE - 1));
+ arg->params[0].u.tmem.size = tee_shm_get_size(shm);
+ arg->params[0].u.tmem.shm_ref = (unsigned long)shm;
+
+ optee_fill_pages_list(pages_list, pages, page_num,
+ tee_shm_get_page_offset(shm));
+ } else {
+ arg->params[0].attr = OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT;
+ arg->params[0].u.tmem.buf_ptr = pa;
+ arg->params[0].u.tmem.size = sz;
+ arg->params[0].u.tmem.shm_ref = (unsigned long)shm;
+ }
+
+ arg->ret = TEEC_SUCCESS;
+ return;
+bad:
+ tee_shm_free(shm);
+}
+
+static void cmd_free_suppl(struct tee_context *ctx, struct tee_shm *shm)
+{
+ struct tee_param param;
+
+ param.attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT;
+ param.u.value.a = OPTEE_MSG_RPC_SHM_TYPE_APPL;
+ param.u.value.b = tee_shm_get_id(shm);
+ param.u.value.c = 0;
+
+ /*
+ * Match the tee_shm_get_from_id() in cmd_alloc_suppl() as secure
+ * world has released its reference.
+ *
+ * It's better to do this before sending the request to supplicant
+ * as we'd like to let the process doing the initial allocation to
+ * do release the last reference too in order to avoid stacking
+ * many pending fput() on the client process. This could otherwise
+ * happen if secure world does many allocate and free in a single
+ * invoke.
+ */
+ tee_shm_put(shm);
+
+ optee_supp_thrd_req(ctx, OPTEE_MSG_RPC_CMD_SHM_FREE, 1, ¶m);
+}
+
+static void handle_rpc_func_cmd_shm_free(struct tee_context *ctx,
+ struct optee_msg_arg *arg)
+{
+ struct tee_shm *shm;
+
+ arg->ret_origin = TEEC_ORIGIN_COMMS;
+
+ if (arg->num_params != 1 ||
+ arg->params[0].attr != OPTEE_MSG_ATTR_TYPE_VALUE_INPUT) {
+ arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+ return;
+ }
+
+ shm = (struct tee_shm *)(unsigned long)arg->params[0].u.value.b;
+ switch (arg->params[0].u.value.a) {
+ case OPTEE_MSG_RPC_SHM_TYPE_APPL:
+ cmd_free_suppl(ctx, shm);
+ break;
+ case OPTEE_MSG_RPC_SHM_TYPE_KERNEL:
+ tee_shm_free(shm);
+ break;
+ default:
+ arg->ret = TEEC_ERROR_BAD_PARAMETERS;
+ }
+ arg->ret = TEEC_SUCCESS;
+}
+
+static void free_pages_list(struct optee_call_ctx *call_ctx)
+{
+ if (call_ctx->pages_list) {
+ optee_free_pages_list(call_ctx->pages_list,
+ call_ctx->num_entries);
+ call_ctx->pages_list = NULL;
+ call_ctx->num_entries = 0;
+ }
+}
+
+void optee_rpc_finalize_call(struct optee_call_ctx *call_ctx)
+{
+ free_pages_list(call_ctx);
+}
+
+static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee,
+ struct tee_shm *shm,
+ struct optee_call_ctx *call_ctx)
+{
+ struct optee_msg_arg *arg;
+
+ arg = tee_shm_get_va(shm, 0);
+ if (IS_ERR(arg)) {
+ pr_err("%s: tee_shm_get_va %p failed\n", __func__, shm);
+ return;
+ }
+
+ switch (arg->cmd) {
+ case OPTEE_MSG_RPC_CMD_GET_TIME:
+ handle_rpc_func_cmd_get_time(arg);
+ break;
+ case OPTEE_MSG_RPC_CMD_WAIT_QUEUE:
+ handle_rpc_func_cmd_wq(optee, arg);
+ break;
+ case OPTEE_MSG_RPC_CMD_SUSPEND:
+ handle_rpc_func_cmd_wait(arg);
+ break;
+ case OPTEE_MSG_RPC_CMD_SHM_ALLOC:
+ free_pages_list(call_ctx);
+ handle_rpc_func_cmd_shm_alloc(ctx, arg, call_ctx);
+ break;
+ case OPTEE_MSG_RPC_CMD_SHM_FREE:
+ handle_rpc_func_cmd_shm_free(ctx, arg);
+ break;
+ default:
+ handle_rpc_supp_cmd(ctx, arg);
+ }
+}
+
+/**
+ * optee_handle_rpc() - handle RPC from secure world
+ * @ctx: context doing the RPC
+ * @param: value of registers for the RPC
+ * @call_ctx: call context. Preserved during one OP-TEE invocation
+ *
+ * Result of RPC is written back into @param.
+ */
+void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param,
+ struct optee_call_ctx *call_ctx)
+{
+ struct tee_device *teedev = ctx->teedev;
+ struct optee *optee = tee_get_drvdata(teedev);
+ struct tee_shm *shm;
+ phys_addr_t pa;
+
+ switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) {
+ case OPTEE_SMC_RPC_FUNC_ALLOC:
+ shm = tee_shm_alloc(ctx, param->a1, TEE_SHM_MAPPED);
+ if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) {
+ reg_pair_from_64(¶m->a1, ¶m->a2, pa);
+ reg_pair_from_64(¶m->a4, ¶m->a5,
+ (unsigned long)shm);
+ } else {
+ param->a1 = 0;
+ param->a2 = 0;
+ param->a4 = 0;
+ param->a5 = 0;
+ }
+ break;
+ case OPTEE_SMC_RPC_FUNC_FREE:
+ shm = reg_pair_to_ptr(param->a1, param->a2);
+ tee_shm_free(shm);
+ break;
+ case OPTEE_SMC_RPC_FUNC_FOREIGN_INTR:
+ /*
+ * A foreign interrupt was raised while secure world was
+ * executing, since they are handled in Linux a dummy RPC is
+ * performed to let Linux take the interrupt through the normal
+ * vector.
+ */
+ break;
+ case OPTEE_SMC_RPC_FUNC_CMD:
+ shm = reg_pair_to_ptr(param->a1, param->a2);
+ handle_rpc_func_cmd(ctx, optee, shm, call_ctx);
+ break;
+ default:
+ pr_warn("Unknown RPC func 0x%x\n",
+ (u32)OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0));
+ break;
+ }
+
+ param->a0 = OPTEE_SMC_CALL_RETURN_FROM_RPC;
+}
diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c
new file mode 100644
index 0000000..4939781
--- /dev/null
+++ b/drivers/tee/optee/shm_pool.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ * Copyright (c) 2017, EPAM Systems
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include "optee_private.h"
+#include "optee_smc.h"
+#include "shm_pool.h"
+
+static int pool_op_alloc(struct tee_shm_pool_mgr *poolm,
+ struct tee_shm *shm, size_t size)
+{
+ unsigned int order = get_order(size);
+ struct page *page;
+
+ page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+ if (!page)
+ return -ENOMEM;
+
+ shm->kaddr = page_address(page);
+ shm->paddr = page_to_phys(page);
+ shm->size = PAGE_SIZE << order;
+
+ return 0;
+}
+
+static void pool_op_free(struct tee_shm_pool_mgr *poolm,
+ struct tee_shm *shm)
+{
+ free_pages((unsigned long)shm->kaddr, get_order(shm->size));
+ shm->kaddr = NULL;
+}
+
+static void pool_op_destroy_poolmgr(struct tee_shm_pool_mgr *poolm)
+{
+ kfree(poolm);
+}
+
+static const struct tee_shm_pool_mgr_ops pool_ops = {
+ .alloc = pool_op_alloc,
+ .free = pool_op_free,
+ .destroy_poolmgr = pool_op_destroy_poolmgr,
+};
+
+/**
+ * optee_shm_pool_alloc_pages() - create page-based allocator pool
+ *
+ * This pool is used when OP-TEE supports dymanic SHM. In this case
+ * command buffers and such are allocated from kernel's own memory.
+ */
+struct tee_shm_pool_mgr *optee_shm_pool_alloc_pages(void)
+{
+ struct tee_shm_pool_mgr *mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
+
+ if (!mgr)
+ return ERR_PTR(-ENOMEM);
+
+ mgr->ops = &pool_ops;
+
+ return mgr;
+}
diff --git a/drivers/tee/optee/shm_pool.h b/drivers/tee/optee/shm_pool.h
new file mode 100644
index 0000000..4e753c3
--- /dev/null
+++ b/drivers/tee/optee/shm_pool.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ * Copyright (c) 2016, EPAM Systems
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef SHM_POOL_H
+#define SHM_POOL_H
+
+#include <linux/tee_drv.h>
+
+struct tee_shm_pool_mgr *optee_shm_pool_alloc_pages(void);
+
+#endif
diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c
new file mode 100644
index 0000000..df35fc0
--- /dev/null
+++ b/drivers/tee/optee/supp.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "optee_private.h"
+
+struct optee_supp_req {
+ struct list_head link;
+
+ bool busy;
+ u32 func;
+ u32 ret;
+ size_t num_params;
+ struct tee_param *param;
+
+ struct completion c;
+};
+
+void optee_supp_init(struct optee_supp *supp)
+{
+ memset(supp, 0, sizeof(*supp));
+ mutex_init(&supp->mutex);
+ init_completion(&supp->reqs_c);
+ idr_init(&supp->idr);
+ INIT_LIST_HEAD(&supp->reqs);
+ supp->req_id = -1;
+}
+
+void optee_supp_uninit(struct optee_supp *supp)
+{
+ mutex_destroy(&supp->mutex);
+ idr_destroy(&supp->idr);
+}
+
+void optee_supp_release(struct optee_supp *supp)
+{
+ int id;
+ struct optee_supp_req *req;
+ struct optee_supp_req *req_tmp;
+
+ mutex_lock(&supp->mutex);
+
+ /* Abort all request retrieved by supplicant */
+ idr_for_each_entry(&supp->idr, req, id) {
+ req->busy = false;
+ idr_remove(&supp->idr, id);
+ req->ret = TEEC_ERROR_COMMUNICATION;
+ complete(&req->c);
+ }
+
+ /* Abort all queued requests */
+ list_for_each_entry_safe(req, req_tmp, &supp->reqs, link) {
+ list_del(&req->link);
+ req->ret = TEEC_ERROR_COMMUNICATION;
+ complete(&req->c);
+ }
+
+ supp->ctx = NULL;
+ supp->req_id = -1;
+
+ mutex_unlock(&supp->mutex);
+}
+
+/**
+ * optee_supp_thrd_req() - request service from supplicant
+ * @ctx: context doing the request
+ * @func: function requested
+ * @num_params: number of elements in @param array
+ * @param: parameters for function
+ *
+ * Returns result of operation to be passed to secure world
+ */
+u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params,
+ struct tee_param *param)
+
+{
+ struct optee *optee = tee_get_drvdata(ctx->teedev);
+ struct optee_supp *supp = &optee->supp;
+ struct optee_supp_req *req = kzalloc(sizeof(*req), GFP_KERNEL);
+ bool interruptable;
+ u32 ret;
+
+ if (!req)
+ return TEEC_ERROR_OUT_OF_MEMORY;
+
+ init_completion(&req->c);
+ req->func = func;
+ req->num_params = num_params;
+ req->param = param;
+
+ /* Insert the request in the request list */
+ mutex_lock(&supp->mutex);
+ list_add_tail(&req->link, &supp->reqs);
+ mutex_unlock(&supp->mutex);
+
+ /* Tell an eventual waiter there's a new request */
+ complete(&supp->reqs_c);
+
+ /*
+ * Wait for supplicant to process and return result, once we've
+ * returned from wait_for_completion(&req->c) successfully we have
+ * exclusive access again.
+ */
+ while (wait_for_completion_interruptible(&req->c)) {
+ mutex_lock(&supp->mutex);
+ interruptable = !supp->ctx;
+ if (interruptable) {
+ /*
+ * There's no supplicant available and since the
+ * supp->mutex currently is held none can
+ * become available until the mutex released
+ * again.
+ *
+ * Interrupting an RPC to supplicant is only
+ * allowed as a way of slightly improving the user
+ * experience in case the supplicant hasn't been
+ * started yet. During normal operation the supplicant
+ * will serve all requests in a timely manner and
+ * interrupting then wouldn't make sense.
+ */
+ interruptable = !req->busy;
+ if (!req->busy)
+ list_del(&req->link);
+ }
+ mutex_unlock(&supp->mutex);
+
+ if (interruptable) {
+ req->ret = TEEC_ERROR_COMMUNICATION;
+ break;
+ }
+ }
+
+ ret = req->ret;
+ kfree(req);
+
+ return ret;
+}
+
+static struct optee_supp_req *supp_pop_entry(struct optee_supp *supp,
+ int num_params, int *id)
+{
+ struct optee_supp_req *req;
+
+ if (supp->req_id != -1) {
+ /*
+ * Supplicant should not mix synchronous and asnynchronous
+ * requests.
+ */
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (list_empty(&supp->reqs))
+ return NULL;
+
+ req = list_first_entry(&supp->reqs, struct optee_supp_req, link);
+
+ if (num_params < req->num_params) {
+ /* Not enough room for parameters */
+ return ERR_PTR(-EINVAL);
+ }
+
+ *id = idr_alloc(&supp->idr, req, 1, 0, GFP_KERNEL);
+ if (*id < 0)
+ return ERR_PTR(-ENOMEM);
+
+ list_del(&req->link);
+ req->busy = true;
+
+ return req;
+}
+
+static int supp_check_recv_params(size_t num_params, struct tee_param *params,
+ size_t *num_meta)
+{
+ size_t n;
+
+ if (!num_params)
+ return -EINVAL;
+
+ /*
+ * If there's memrefs we need to decrease those as they where
+ * increased earlier and we'll even refuse to accept any below.
+ */
+ for (n = 0; n < num_params; n++)
+ if (tee_param_is_memref(params + n) && params[n].u.memref.shm)
+ tee_shm_put(params[n].u.memref.shm);
+
+ /*
+ * We only expect parameters as TEE_IOCTL_PARAM_ATTR_TYPE_NONE with
+ * or without the TEE_IOCTL_PARAM_ATTR_META bit set.
+ */
+ for (n = 0; n < num_params; n++)
+ if (params[n].attr &&
+ params[n].attr != TEE_IOCTL_PARAM_ATTR_META)
+ return -EINVAL;
+
+ /* At most we'll need one meta parameter so no need to check for more */
+ if (params->attr == TEE_IOCTL_PARAM_ATTR_META)
+ *num_meta = 1;
+ else
+ *num_meta = 0;
+
+ return 0;
+}
+
+/**
+ * optee_supp_recv() - receive request for supplicant
+ * @ctx: context receiving the request
+ * @func: requested function in supplicant
+ * @num_params: number of elements allocated in @param, updated with number
+ * used elements
+ * @param: space for parameters for @func
+ *
+ * Returns 0 on success or <0 on failure
+ */
+int optee_supp_recv(struct tee_context *ctx, u32 *func, u32 *num_params,
+ struct tee_param *param)
+{
+ struct tee_device *teedev = ctx->teedev;
+ struct optee *optee = tee_get_drvdata(teedev);
+ struct optee_supp *supp = &optee->supp;
+ struct optee_supp_req *req = NULL;
+ int id;
+ size_t num_meta;
+ int rc;
+
+ rc = supp_check_recv_params(*num_params, param, &num_meta);
+ if (rc)
+ return rc;
+
+ while (true) {
+ mutex_lock(&supp->mutex);
+ req = supp_pop_entry(supp, *num_params - num_meta, &id);
+ mutex_unlock(&supp->mutex);
+
+ if (req) {
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ break;
+ }
+
+ /*
+ * If we didn't get a request we'll block in
+ * wait_for_completion() to avoid needless spinning.
+ *
+ * This is where supplicant will be hanging most of
+ * the time, let's make this interruptable so we
+ * can easily restart supplicant if needed.
+ */
+ if (wait_for_completion_interruptible(&supp->reqs_c))
+ return -ERESTARTSYS;
+ }
+
+ if (num_meta) {
+ /*
+ * tee-supplicant support meta parameters -> requsts can be
+ * processed asynchronously.
+ */
+ param->attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT |
+ TEE_IOCTL_PARAM_ATTR_META;
+ param->u.value.a = id;
+ param->u.value.b = 0;
+ param->u.value.c = 0;
+ } else {
+ mutex_lock(&supp->mutex);
+ supp->req_id = id;
+ mutex_unlock(&supp->mutex);
+ }
+
+ *func = req->func;
+ *num_params = req->num_params + num_meta;
+ memcpy(param + num_meta, req->param,
+ sizeof(struct tee_param) * req->num_params);
+
+ return 0;
+}
+
+static struct optee_supp_req *supp_pop_req(struct optee_supp *supp,
+ size_t num_params,
+ struct tee_param *param,
+ size_t *num_meta)
+{
+ struct optee_supp_req *req;
+ int id;
+ size_t nm;
+ const u32 attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT |
+ TEE_IOCTL_PARAM_ATTR_META;
+
+ if (!num_params)
+ return ERR_PTR(-EINVAL);
+
+ if (supp->req_id == -1) {
+ if (param->attr != attr)
+ return ERR_PTR(-EINVAL);
+ id = param->u.value.a;
+ nm = 1;
+ } else {
+ id = supp->req_id;
+ nm = 0;
+ }
+
+ req = idr_find(&supp->idr, id);
+ if (!req)
+ return ERR_PTR(-ENOENT);
+
+ if ((num_params - nm) != req->num_params)
+ return ERR_PTR(-EINVAL);
+
+ req->busy = false;
+ idr_remove(&supp->idr, id);
+ supp->req_id = -1;
+ *num_meta = nm;
+
+ return req;
+}
+
+/**
+ * optee_supp_send() - send result of request from supplicant
+ * @ctx: context sending result
+ * @ret: return value of request
+ * @num_params: number of parameters returned
+ * @param: returned parameters
+ *
+ * Returns 0 on success or <0 on failure.
+ */
+int optee_supp_send(struct tee_context *ctx, u32 ret, u32 num_params,
+ struct tee_param *param)
+{
+ struct tee_device *teedev = ctx->teedev;
+ struct optee *optee = tee_get_drvdata(teedev);
+ struct optee_supp *supp = &optee->supp;
+ struct optee_supp_req *req;
+ size_t n;
+ size_t num_meta;
+
+ mutex_lock(&supp->mutex);
+ req = supp_pop_req(supp, num_params, param, &num_meta);
+ mutex_unlock(&supp->mutex);
+
+ if (IS_ERR(req)) {
+ /* Something is wrong, let supplicant restart. */
+ return PTR_ERR(req);
+ }
+
+ /* Update out and in/out parameters */
+ for (n = 0; n < req->num_params; n++) {
+ struct tee_param *p = req->param + n;
+
+ switch (p->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) {
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+ p->u.value.a = param[n + num_meta].u.value.a;
+ p->u.value.b = param[n + num_meta].u.value.b;
+ p->u.value.c = param[n + num_meta].u.value.c;
+ break;
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+ p->u.memref.size = param[n + num_meta].u.memref.size;
+ break;
+ default:
+ break;
+ }
+ }
+ req->ret = ret;
+
+ /* Let the requesting thread continue */
+ complete(&req->c);
+
+ return 0;
+}
diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
new file mode 100644
index 0000000..6c4b200
--- /dev/null
+++ b/drivers/tee/tee_core.c
@@ -0,0 +1,949 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include <linux/uaccess.h>
+#include "tee_private.h"
+
+#define TEE_NUM_DEVICES 32
+
+#define TEE_IOCTL_PARAM_SIZE(x) (sizeof(struct tee_param) * (x))
+
+/*
+ * Unprivileged devices in the lower half range and privileged devices in
+ * the upper half range.
+ */
+static DECLARE_BITMAP(dev_mask, TEE_NUM_DEVICES);
+static DEFINE_SPINLOCK(driver_lock);
+
+static struct class *tee_class;
+static dev_t tee_devt;
+
+static int tee_open(struct inode *inode, struct file *filp)
+{
+ int rc;
+ struct tee_device *teedev;
+ struct tee_context *ctx;
+
+ teedev = container_of(inode->i_cdev, struct tee_device, cdev);
+ if (!tee_device_get(teedev))
+ return -EINVAL;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ kref_init(&ctx->refcount);
+ ctx->teedev = teedev;
+ INIT_LIST_HEAD(&ctx->list_shm);
+ filp->private_data = ctx;
+ rc = teedev->desc->ops->open(ctx);
+ if (rc)
+ goto err;
+
+ return 0;
+err:
+ kfree(ctx);
+ tee_device_put(teedev);
+ return rc;
+}
+
+void teedev_ctx_get(struct tee_context *ctx)
+{
+ if (ctx->releasing)
+ return;
+
+ kref_get(&ctx->refcount);
+}
+
+static void teedev_ctx_release(struct kref *ref)
+{
+ struct tee_context *ctx = container_of(ref, struct tee_context,
+ refcount);
+ ctx->releasing = true;
+ ctx->teedev->desc->ops->release(ctx);
+ kfree(ctx);
+}
+
+void teedev_ctx_put(struct tee_context *ctx)
+{
+ if (ctx->releasing)
+ return;
+
+ kref_put(&ctx->refcount, teedev_ctx_release);
+}
+
+static void teedev_close_context(struct tee_context *ctx)
+{
+ tee_device_put(ctx->teedev);
+ teedev_ctx_put(ctx);
+}
+
+static int tee_release(struct inode *inode, struct file *filp)
+{
+ teedev_close_context(filp->private_data);
+ return 0;
+}
+
+static int tee_ioctl_version(struct tee_context *ctx,
+ struct tee_ioctl_version_data __user *uvers)
+{
+ struct tee_ioctl_version_data vers;
+
+ ctx->teedev->desc->ops->get_version(ctx->teedev, &vers);
+
+ if (ctx->teedev->desc->flags & TEE_DESC_PRIVILEGED)
+ vers.gen_caps |= TEE_GEN_CAP_PRIVILEGED;
+
+ if (copy_to_user(uvers, &vers, sizeof(vers)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int tee_ioctl_shm_alloc(struct tee_context *ctx,
+ struct tee_ioctl_shm_alloc_data __user *udata)
+{
+ long ret;
+ struct tee_ioctl_shm_alloc_data data;
+ struct tee_shm *shm;
+
+ if (copy_from_user(&data, udata, sizeof(data)))
+ return -EFAULT;
+
+ /* Currently no input flags are supported */
+ if (data.flags)
+ return -EINVAL;
+
+ shm = tee_shm_alloc(ctx, data.size, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+ if (IS_ERR(shm))
+ return PTR_ERR(shm);
+
+ data.id = shm->id;
+ data.flags = shm->flags;
+ data.size = shm->size;
+
+ if (copy_to_user(udata, &data, sizeof(data)))
+ ret = -EFAULT;
+ else
+ ret = tee_shm_get_fd(shm);
+
+ /*
+ * When user space closes the file descriptor the shared memory
+ * should be freed or if tee_shm_get_fd() failed then it will
+ * be freed immediately.
+ */
+ tee_shm_put(shm);
+ return ret;
+}
+
+static int
+tee_ioctl_shm_register(struct tee_context *ctx,
+ struct tee_ioctl_shm_register_data __user *udata)
+{
+ long ret;
+ struct tee_ioctl_shm_register_data data;
+ struct tee_shm *shm;
+
+ if (copy_from_user(&data, udata, sizeof(data)))
+ return -EFAULT;
+
+ /* Currently no input flags are supported */
+ if (data.flags)
+ return -EINVAL;
+
+ shm = tee_shm_register(ctx, data.addr, data.length,
+ TEE_SHM_DMA_BUF | TEE_SHM_USER_MAPPED);
+ if (IS_ERR(shm))
+ return PTR_ERR(shm);
+
+ data.id = shm->id;
+ data.flags = shm->flags;
+ data.length = shm->size;
+
+ if (copy_to_user(udata, &data, sizeof(data)))
+ ret = -EFAULT;
+ else
+ ret = tee_shm_get_fd(shm);
+ /*
+ * When user space closes the file descriptor the shared memory
+ * should be freed or if tee_shm_get_fd() failed then it will
+ * be freed immediately.
+ */
+ tee_shm_put(shm);
+ return ret;
+}
+
+static int params_from_user(struct tee_context *ctx, struct tee_param *params,
+ size_t num_params,
+ struct tee_ioctl_param __user *uparams)
+{
+ size_t n;
+
+ for (n = 0; n < num_params; n++) {
+ struct tee_shm *shm;
+ struct tee_ioctl_param ip;
+
+ if (copy_from_user(&ip, uparams + n, sizeof(ip)))
+ return -EFAULT;
+
+ /* All unused attribute bits has to be zero */
+ if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_MASK)
+ return -EINVAL;
+
+ params[n].attr = ip.attr;
+ switch (ip.attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) {
+ case TEE_IOCTL_PARAM_ATTR_TYPE_NONE:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+ break;
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+ params[n].u.value.a = ip.a;
+ params[n].u.value.b = ip.b;
+ params[n].u.value.c = ip.c;
+ break;
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+ /*
+ * If we fail to get a pointer to a shared memory
+ * object (and increase the ref count) from an
+ * identifier we return an error. All pointers that
+ * has been added in params have an increased ref
+ * count. It's the callers responibility to do
+ * tee_shm_put() on all resolved pointers.
+ */
+ shm = tee_shm_get_from_id(ctx, ip.c);
+ if (IS_ERR(shm))
+ return PTR_ERR(shm);
+
+ params[n].u.memref.shm_offs = ip.a;
+ params[n].u.memref.size = ip.b;
+ params[n].u.memref.shm = shm;
+ break;
+ default:
+ /* Unknown attribute */
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int params_to_user(struct tee_ioctl_param __user *uparams,
+ size_t num_params, struct tee_param *params)
+{
+ size_t n;
+
+ for (n = 0; n < num_params; n++) {
+ struct tee_ioctl_param __user *up = uparams + n;
+ struct tee_param *p = params + n;
+
+ switch (p->attr) {
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+ if (put_user(p->u.value.a, &up->a) ||
+ put_user(p->u.value.b, &up->b) ||
+ put_user(p->u.value.c, &up->c))
+ return -EFAULT;
+ break;
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+ if (put_user((u64)p->u.memref.size, &up->b))
+ return -EFAULT;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+static int tee_ioctl_open_session(struct tee_context *ctx,
+ struct tee_ioctl_buf_data __user *ubuf)
+{
+ int rc;
+ size_t n;
+ struct tee_ioctl_buf_data buf;
+ struct tee_ioctl_open_session_arg __user *uarg;
+ struct tee_ioctl_open_session_arg arg;
+ struct tee_ioctl_param __user *uparams = NULL;
+ struct tee_param *params = NULL;
+ bool have_session = false;
+
+ if (!ctx->teedev->desc->ops->open_session)
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, sizeof(buf)))
+ return -EFAULT;
+
+ if (buf.buf_len > TEE_MAX_ARG_SIZE ||
+ buf.buf_len < sizeof(struct tee_ioctl_open_session_arg))
+ return -EINVAL;
+
+ uarg = u64_to_user_ptr(buf.buf_ptr);
+ if (copy_from_user(&arg, uarg, sizeof(arg)))
+ return -EFAULT;
+
+ if (sizeof(arg) + TEE_IOCTL_PARAM_SIZE(arg.num_params) != buf.buf_len)
+ return -EINVAL;
+
+ if (arg.num_params) {
+ params = kcalloc(arg.num_params, sizeof(struct tee_param),
+ GFP_KERNEL);
+ if (!params)
+ return -ENOMEM;
+ uparams = uarg->params;
+ rc = params_from_user(ctx, params, arg.num_params, uparams);
+ if (rc)
+ goto out;
+ }
+
+ rc = ctx->teedev->desc->ops->open_session(ctx, &arg, params);
+ if (rc)
+ goto out;
+ have_session = true;
+
+ if (put_user(arg.session, &uarg->session) ||
+ put_user(arg.ret, &uarg->ret) ||
+ put_user(arg.ret_origin, &uarg->ret_origin)) {
+ rc = -EFAULT;
+ goto out;
+ }
+ rc = params_to_user(uparams, arg.num_params, params);
+out:
+ /*
+ * If we've succeeded to open the session but failed to communicate
+ * it back to user space, close the session again to avoid leakage.
+ */
+ if (rc && have_session && ctx->teedev->desc->ops->close_session)
+ ctx->teedev->desc->ops->close_session(ctx, arg.session);
+
+ if (params) {
+ /* Decrease ref count for all valid shared memory pointers */
+ for (n = 0; n < arg.num_params; n++)
+ if (tee_param_is_memref(params + n) &&
+ params[n].u.memref.shm)
+ tee_shm_put(params[n].u.memref.shm);
+ kfree(params);
+ }
+
+ return rc;
+}
+
+static int tee_ioctl_invoke(struct tee_context *ctx,
+ struct tee_ioctl_buf_data __user *ubuf)
+{
+ int rc;
+ size_t n;
+ struct tee_ioctl_buf_data buf;
+ struct tee_ioctl_invoke_arg __user *uarg;
+ struct tee_ioctl_invoke_arg arg;
+ struct tee_ioctl_param __user *uparams = NULL;
+ struct tee_param *params = NULL;
+
+ if (!ctx->teedev->desc->ops->invoke_func)
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, sizeof(buf)))
+ return -EFAULT;
+
+ if (buf.buf_len > TEE_MAX_ARG_SIZE ||
+ buf.buf_len < sizeof(struct tee_ioctl_invoke_arg))
+ return -EINVAL;
+
+ uarg = u64_to_user_ptr(buf.buf_ptr);
+ if (copy_from_user(&arg, uarg, sizeof(arg)))
+ return -EFAULT;
+
+ if (sizeof(arg) + TEE_IOCTL_PARAM_SIZE(arg.num_params) != buf.buf_len)
+ return -EINVAL;
+
+ if (arg.num_params) {
+ params = kcalloc(arg.num_params, sizeof(struct tee_param),
+ GFP_KERNEL);
+ if (!params)
+ return -ENOMEM;
+ uparams = uarg->params;
+ rc = params_from_user(ctx, params, arg.num_params, uparams);
+ if (rc)
+ goto out;
+ }
+
+ rc = ctx->teedev->desc->ops->invoke_func(ctx, &arg, params);
+ if (rc)
+ goto out;
+
+ if (put_user(arg.ret, &uarg->ret) ||
+ put_user(arg.ret_origin, &uarg->ret_origin)) {
+ rc = -EFAULT;
+ goto out;
+ }
+ rc = params_to_user(uparams, arg.num_params, params);
+out:
+ if (params) {
+ /* Decrease ref count for all valid shared memory pointers */
+ for (n = 0; n < arg.num_params; n++)
+ if (tee_param_is_memref(params + n) &&
+ params[n].u.memref.shm)
+ tee_shm_put(params[n].u.memref.shm);
+ kfree(params);
+ }
+ return rc;
+}
+
+static int tee_ioctl_cancel(struct tee_context *ctx,
+ struct tee_ioctl_cancel_arg __user *uarg)
+{
+ struct tee_ioctl_cancel_arg arg;
+
+ if (!ctx->teedev->desc->ops->cancel_req)
+ return -EINVAL;
+
+ if (copy_from_user(&arg, uarg, sizeof(arg)))
+ return -EFAULT;
+
+ return ctx->teedev->desc->ops->cancel_req(ctx, arg.cancel_id,
+ arg.session);
+}
+
+static int
+tee_ioctl_close_session(struct tee_context *ctx,
+ struct tee_ioctl_close_session_arg __user *uarg)
+{
+ struct tee_ioctl_close_session_arg arg;
+
+ if (!ctx->teedev->desc->ops->close_session)
+ return -EINVAL;
+
+ if (copy_from_user(&arg, uarg, sizeof(arg)))
+ return -EFAULT;
+
+ return ctx->teedev->desc->ops->close_session(ctx, arg.session);
+}
+
+static int params_to_supp(struct tee_context *ctx,
+ struct tee_ioctl_param __user *uparams,
+ size_t num_params, struct tee_param *params)
+{
+ size_t n;
+
+ for (n = 0; n < num_params; n++) {
+ struct tee_ioctl_param ip;
+ struct tee_param *p = params + n;
+
+ ip.attr = p->attr;
+ switch (p->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) {
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+ ip.a = p->u.value.a;
+ ip.b = p->u.value.b;
+ ip.c = p->u.value.c;
+ break;
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+ ip.b = p->u.memref.size;
+ if (!p->u.memref.shm) {
+ ip.a = 0;
+ ip.c = (u64)-1; /* invalid shm id */
+ break;
+ }
+ ip.a = p->u.memref.shm_offs;
+ ip.c = p->u.memref.shm->id;
+ break;
+ default:
+ ip.a = 0;
+ ip.b = 0;
+ ip.c = 0;
+ break;
+ }
+
+ if (copy_to_user(uparams + n, &ip, sizeof(ip)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int tee_ioctl_supp_recv(struct tee_context *ctx,
+ struct tee_ioctl_buf_data __user *ubuf)
+{
+ int rc;
+ struct tee_ioctl_buf_data buf;
+ struct tee_iocl_supp_recv_arg __user *uarg;
+ struct tee_param *params;
+ u32 num_params;
+ u32 func;
+
+ if (!ctx->teedev->desc->ops->supp_recv)
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, sizeof(buf)))
+ return -EFAULT;
+
+ if (buf.buf_len > TEE_MAX_ARG_SIZE ||
+ buf.buf_len < sizeof(struct tee_iocl_supp_recv_arg))
+ return -EINVAL;
+
+ uarg = u64_to_user_ptr(buf.buf_ptr);
+ if (get_user(num_params, &uarg->num_params))
+ return -EFAULT;
+
+ if (sizeof(*uarg) + TEE_IOCTL_PARAM_SIZE(num_params) != buf.buf_len)
+ return -EINVAL;
+
+ params = kcalloc(num_params, sizeof(struct tee_param), GFP_KERNEL);
+ if (!params)
+ return -ENOMEM;
+
+ rc = params_from_user(ctx, params, num_params, uarg->params);
+ if (rc)
+ goto out;
+
+ rc = ctx->teedev->desc->ops->supp_recv(ctx, &func, &num_params, params);
+ if (rc)
+ goto out;
+
+ if (put_user(func, &uarg->func) ||
+ put_user(num_params, &uarg->num_params)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ rc = params_to_supp(ctx, uarg->params, num_params, params);
+out:
+ kfree(params);
+ return rc;
+}
+
+static int params_from_supp(struct tee_param *params, size_t num_params,
+ struct tee_ioctl_param __user *uparams)
+{
+ size_t n;
+
+ for (n = 0; n < num_params; n++) {
+ struct tee_param *p = params + n;
+ struct tee_ioctl_param ip;
+
+ if (copy_from_user(&ip, uparams + n, sizeof(ip)))
+ return -EFAULT;
+
+ /* All unused attribute bits has to be zero */
+ if (ip.attr & ~TEE_IOCTL_PARAM_ATTR_MASK)
+ return -EINVAL;
+
+ p->attr = ip.attr;
+ switch (ip.attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) {
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT:
+ /* Only out and in/out values can be updated */
+ p->u.value.a = ip.a;
+ p->u.value.b = ip.b;
+ p->u.value.c = ip.c;
+ break;
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+ /*
+ * Only the size of the memref can be updated.
+ * Since we don't have access to the original
+ * parameters here, only store the supplied size.
+ * The driver will copy the updated size into the
+ * original parameters.
+ */
+ p->u.memref.shm = NULL;
+ p->u.memref.shm_offs = 0;
+ p->u.memref.size = ip.b;
+ break;
+ default:
+ memset(&p->u, 0, sizeof(p->u));
+ break;
+ }
+ }
+ return 0;
+}
+
+static int tee_ioctl_supp_send(struct tee_context *ctx,
+ struct tee_ioctl_buf_data __user *ubuf)
+{
+ long rc;
+ struct tee_ioctl_buf_data buf;
+ struct tee_iocl_supp_send_arg __user *uarg;
+ struct tee_param *params;
+ u32 num_params;
+ u32 ret;
+
+ /* Not valid for this driver */
+ if (!ctx->teedev->desc->ops->supp_send)
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, sizeof(buf)))
+ return -EFAULT;
+
+ if (buf.buf_len > TEE_MAX_ARG_SIZE ||
+ buf.buf_len < sizeof(struct tee_iocl_supp_send_arg))
+ return -EINVAL;
+
+ uarg = u64_to_user_ptr(buf.buf_ptr);
+ if (get_user(ret, &uarg->ret) ||
+ get_user(num_params, &uarg->num_params))
+ return -EFAULT;
+
+ if (sizeof(*uarg) + TEE_IOCTL_PARAM_SIZE(num_params) > buf.buf_len)
+ return -EINVAL;
+
+ params = kcalloc(num_params, sizeof(struct tee_param), GFP_KERNEL);
+ if (!params)
+ return -ENOMEM;
+
+ rc = params_from_supp(params, num_params, uarg->params);
+ if (rc)
+ goto out;
+
+ rc = ctx->teedev->desc->ops->supp_send(ctx, ret, num_params, params);
+out:
+ kfree(params);
+ return rc;
+}
+
+static long tee_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ struct tee_context *ctx = filp->private_data;
+ void __user *uarg = (void __user *)arg;
+
+ switch (cmd) {
+ case TEE_IOC_VERSION:
+ return tee_ioctl_version(ctx, uarg);
+ case TEE_IOC_SHM_ALLOC:
+ return tee_ioctl_shm_alloc(ctx, uarg);
+ case TEE_IOC_SHM_REGISTER:
+ return tee_ioctl_shm_register(ctx, uarg);
+ case TEE_IOC_OPEN_SESSION:
+ return tee_ioctl_open_session(ctx, uarg);
+ case TEE_IOC_INVOKE:
+ return tee_ioctl_invoke(ctx, uarg);
+ case TEE_IOC_CANCEL:
+ return tee_ioctl_cancel(ctx, uarg);
+ case TEE_IOC_CLOSE_SESSION:
+ return tee_ioctl_close_session(ctx, uarg);
+ case TEE_IOC_SUPPL_RECV:
+ return tee_ioctl_supp_recv(ctx, uarg);
+ case TEE_IOC_SUPPL_SEND:
+ return tee_ioctl_supp_send(ctx, uarg);
+ default:
+ return -EINVAL;
+ }
+}
+
+static const struct file_operations tee_fops = {
+ .owner = THIS_MODULE,
+ .open = tee_open,
+ .release = tee_release,
+ .unlocked_ioctl = tee_ioctl,
+ .compat_ioctl = tee_ioctl,
+};
+
+static void tee_release_device(struct device *dev)
+{
+ struct tee_device *teedev = container_of(dev, struct tee_device, dev);
+
+ spin_lock(&driver_lock);
+ clear_bit(teedev->id, dev_mask);
+ spin_unlock(&driver_lock);
+ mutex_destroy(&teedev->mutex);
+ idr_destroy(&teedev->idr);
+ kfree(teedev);
+}
+
+/**
+ * tee_device_alloc() - Allocate a new struct tee_device instance
+ * @teedesc: Descriptor for this driver
+ * @dev: Parent device for this device
+ * @pool: Shared memory pool, NULL if not used
+ * @driver_data: Private driver data for this device
+ *
+ * Allocates a new struct tee_device instance. The device is
+ * removed by tee_device_unregister().
+ *
+ * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure
+ */
+struct tee_device *tee_device_alloc(const struct tee_desc *teedesc,
+ struct device *dev,
+ struct tee_shm_pool *pool,
+ void *driver_data)
+{
+ struct tee_device *teedev;
+ void *ret;
+ int rc;
+ int offs = 0;
+
+ if (!teedesc || !teedesc->name || !teedesc->ops ||
+ !teedesc->ops->get_version || !teedesc->ops->open ||
+ !teedesc->ops->release || !pool)
+ return ERR_PTR(-EINVAL);
+
+ teedev = kzalloc(sizeof(*teedev), GFP_KERNEL);
+ if (!teedev) {
+ ret = ERR_PTR(-ENOMEM);
+ goto err;
+ }
+
+ if (teedesc->flags & TEE_DESC_PRIVILEGED)
+ offs = TEE_NUM_DEVICES / 2;
+
+ spin_lock(&driver_lock);
+ teedev->id = find_next_zero_bit(dev_mask, TEE_NUM_DEVICES, offs);
+ if (teedev->id < TEE_NUM_DEVICES)
+ set_bit(teedev->id, dev_mask);
+ spin_unlock(&driver_lock);
+
+ if (teedev->id >= TEE_NUM_DEVICES) {
+ ret = ERR_PTR(-ENOMEM);
+ goto err;
+ }
+
+ snprintf(teedev->name, sizeof(teedev->name), "tee%s%d",
+ teedesc->flags & TEE_DESC_PRIVILEGED ? "priv" : "",
+ teedev->id - offs);
+
+ teedev->dev.class = tee_class;
+ teedev->dev.release = tee_release_device;
+ teedev->dev.parent = dev;
+
+ teedev->dev.devt = MKDEV(MAJOR(tee_devt), teedev->id);
+
+ rc = dev_set_name(&teedev->dev, "%s", teedev->name);
+ if (rc) {
+ ret = ERR_PTR(rc);
+ goto err_devt;
+ }
+
+ cdev_init(&teedev->cdev, &tee_fops);
+ teedev->cdev.owner = teedesc->owner;
+ teedev->cdev.kobj.parent = &teedev->dev.kobj;
+
+ dev_set_drvdata(&teedev->dev, driver_data);
+ device_initialize(&teedev->dev);
+
+ /* 1 as tee_device_unregister() does one final tee_device_put() */
+ teedev->num_users = 1;
+ init_completion(&teedev->c_no_users);
+ mutex_init(&teedev->mutex);
+ idr_init(&teedev->idr);
+
+ teedev->desc = teedesc;
+ teedev->pool = pool;
+
+ return teedev;
+err_devt:
+ unregister_chrdev_region(teedev->dev.devt, 1);
+err:
+ pr_err("could not register %s driver\n",
+ teedesc->flags & TEE_DESC_PRIVILEGED ? "privileged" : "client");
+ if (teedev && teedev->id < TEE_NUM_DEVICES) {
+ spin_lock(&driver_lock);
+ clear_bit(teedev->id, dev_mask);
+ spin_unlock(&driver_lock);
+ }
+ kfree(teedev);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tee_device_alloc);
+
+static ssize_t implementation_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct tee_device *teedev = container_of(dev, struct tee_device, dev);
+ struct tee_ioctl_version_data vers;
+
+ teedev->desc->ops->get_version(teedev, &vers);
+ return scnprintf(buf, PAGE_SIZE, "%d\n", vers.impl_id);
+}
+static DEVICE_ATTR_RO(implementation_id);
+
+static struct attribute *tee_dev_attrs[] = {
+ &dev_attr_implementation_id.attr,
+ NULL
+};
+
+static const struct attribute_group tee_dev_group = {
+ .attrs = tee_dev_attrs,
+};
+
+/**
+ * tee_device_register() - Registers a TEE device
+ * @teedev: Device to register
+ *
+ * tee_device_unregister() need to be called to remove the @teedev if
+ * this function fails.
+ *
+ * @returns < 0 on failure
+ */
+int tee_device_register(struct tee_device *teedev)
+{
+ int rc;
+
+ if (teedev->flags & TEE_DEVICE_FLAG_REGISTERED) {
+ dev_err(&teedev->dev, "attempt to register twice\n");
+ return -EINVAL;
+ }
+
+ rc = cdev_add(&teedev->cdev, teedev->dev.devt, 1);
+ if (rc) {
+ dev_err(&teedev->dev,
+ "unable to cdev_add() %s, major %d, minor %d, err=%d\n",
+ teedev->name, MAJOR(teedev->dev.devt),
+ MINOR(teedev->dev.devt), rc);
+ return rc;
+ }
+
+ rc = device_add(&teedev->dev);
+ if (rc) {
+ dev_err(&teedev->dev,
+ "unable to device_add() %s, major %d, minor %d, err=%d\n",
+ teedev->name, MAJOR(teedev->dev.devt),
+ MINOR(teedev->dev.devt), rc);
+ goto err_device_add;
+ }
+
+ rc = sysfs_create_group(&teedev->dev.kobj, &tee_dev_group);
+ if (rc) {
+ dev_err(&teedev->dev,
+ "failed to create sysfs attributes, err=%d\n", rc);
+ goto err_sysfs_create_group;
+ }
+
+ teedev->flags |= TEE_DEVICE_FLAG_REGISTERED;
+ return 0;
+
+err_sysfs_create_group:
+ device_del(&teedev->dev);
+err_device_add:
+ cdev_del(&teedev->cdev);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(tee_device_register);
+
+void tee_device_put(struct tee_device *teedev)
+{
+ mutex_lock(&teedev->mutex);
+ /* Shouldn't put in this state */
+ if (!WARN_ON(!teedev->desc)) {
+ teedev->num_users--;
+ if (!teedev->num_users) {
+ teedev->desc = NULL;
+ complete(&teedev->c_no_users);
+ }
+ }
+ mutex_unlock(&teedev->mutex);
+}
+
+bool tee_device_get(struct tee_device *teedev)
+{
+ mutex_lock(&teedev->mutex);
+ if (!teedev->desc) {
+ mutex_unlock(&teedev->mutex);
+ return false;
+ }
+ teedev->num_users++;
+ mutex_unlock(&teedev->mutex);
+ return true;
+}
+
+/**
+ * tee_device_unregister() - Removes a TEE device
+ * @teedev: Device to unregister
+ *
+ * This function should be called to remove the @teedev even if
+ * tee_device_register() hasn't been called yet. Does nothing if
+ * @teedev is NULL.
+ */
+void tee_device_unregister(struct tee_device *teedev)
+{
+ if (!teedev)
+ return;
+
+ if (teedev->flags & TEE_DEVICE_FLAG_REGISTERED) {
+ sysfs_remove_group(&teedev->dev.kobj, &tee_dev_group);
+ cdev_del(&teedev->cdev);
+ device_del(&teedev->dev);
+ }
+
+ tee_device_put(teedev);
+ wait_for_completion(&teedev->c_no_users);
+
+ /*
+ * No need to take a mutex any longer now since teedev->desc was
+ * set to NULL before teedev->c_no_users was completed.
+ */
+
+ teedev->pool = NULL;
+
+ put_device(&teedev->dev);
+}
+EXPORT_SYMBOL_GPL(tee_device_unregister);
+
+/**
+ * tee_get_drvdata() - Return driver_data pointer
+ * @teedev: Device containing the driver_data pointer
+ * @returns the driver_data pointer supplied to tee_register().
+ */
+void *tee_get_drvdata(struct tee_device *teedev)
+{
+ return dev_get_drvdata(&teedev->dev);
+}
+EXPORT_SYMBOL_GPL(tee_get_drvdata);
+
+static int __init tee_init(void)
+{
+ int rc;
+
+ tee_class = class_create(THIS_MODULE, "tee");
+ if (IS_ERR(tee_class)) {
+ pr_err("couldn't create class\n");
+ return PTR_ERR(tee_class);
+ }
+
+ rc = alloc_chrdev_region(&tee_devt, 0, TEE_NUM_DEVICES, "tee");
+ if (rc) {
+ pr_err("failed to allocate char dev region\n");
+ class_destroy(tee_class);
+ tee_class = NULL;
+ }
+
+ return rc;
+}
+
+static void __exit tee_exit(void)
+{
+ class_destroy(tee_class);
+ tee_class = NULL;
+ unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES);
+}
+
+subsys_initcall(tee_init);
+module_exit(tee_exit);
+
+MODULE_AUTHOR("Linaro");
+MODULE_DESCRIPTION("TEE Driver");
+MODULE_VERSION("1.0");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/tee/tee_private.h b/drivers/tee/tee_private.h
new file mode 100644
index 0000000..85d99d6
--- /dev/null
+++ b/drivers/tee/tee_private.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef TEE_PRIVATE_H
+#define TEE_PRIVATE_H
+
+#include <linux/cdev.h>
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+/**
+ * struct tee_shm_pool - shared memory pool
+ * @private_mgr: pool manager for shared memory only between kernel
+ * and secure world
+ * @dma_buf_mgr: pool manager for shared memory exported to user space
+ */
+struct tee_shm_pool {
+ struct tee_shm_pool_mgr *private_mgr;
+ struct tee_shm_pool_mgr *dma_buf_mgr;
+};
+
+#define TEE_DEVICE_FLAG_REGISTERED 0x1
+#define TEE_MAX_DEV_NAME_LEN 32
+
+/**
+ * struct tee_device - TEE Device representation
+ * @name: name of device
+ * @desc: description of device
+ * @id: unique id of device
+ * @flags: represented by TEE_DEVICE_FLAG_REGISTERED above
+ * @dev: embedded basic device structure
+ * @cdev: embedded cdev
+ * @num_users: number of active users of this device
+ * @c_no_user: completion used when unregistering the device
+ * @mutex: mutex protecting @num_users and @idr
+ * @idr: register of shared memory object allocated on this device
+ * @pool: shared memory pool
+ */
+struct tee_device {
+ char name[TEE_MAX_DEV_NAME_LEN];
+ const struct tee_desc *desc;
+ int id;
+ unsigned int flags;
+
+ struct device dev;
+ struct cdev cdev;
+
+ size_t num_users;
+ struct completion c_no_users;
+ struct mutex mutex; /* protects num_users and idr */
+
+ struct idr idr;
+ struct tee_shm_pool *pool;
+};
+
+int tee_shm_init(void);
+
+int tee_shm_get_fd(struct tee_shm *shm);
+
+bool tee_device_get(struct tee_device *teedev);
+void tee_device_put(struct tee_device *teedev);
+
+void teedev_ctx_get(struct tee_context *ctx);
+void teedev_ctx_put(struct tee_context *ctx);
+
+#endif /*TEE_PRIVATE_H*/
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
new file mode 100644
index 0000000..ed2d71c
--- /dev/null
+++ b/drivers/tee/tee_shm.c
@@ -0,0 +1,510 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/fdtable.h>
+#include <linux/idr.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include "tee_private.h"
+
+static void tee_shm_release(struct tee_shm *shm)
+{
+ struct tee_device *teedev = shm->teedev;
+
+ mutex_lock(&teedev->mutex);
+ idr_remove(&teedev->idr, shm->id);
+ if (shm->ctx)
+ list_del(&shm->link);
+ mutex_unlock(&teedev->mutex);
+
+ if (shm->flags & TEE_SHM_POOL) {
+ struct tee_shm_pool_mgr *poolm;
+
+ if (shm->flags & TEE_SHM_DMA_BUF)
+ poolm = teedev->pool->dma_buf_mgr;
+ else
+ poolm = teedev->pool->private_mgr;
+
+ poolm->ops->free(poolm, shm);
+ } else if (shm->flags & TEE_SHM_REGISTER) {
+ size_t n;
+ int rc = teedev->desc->ops->shm_unregister(shm->ctx, shm);
+
+ if (rc)
+ dev_err(teedev->dev.parent,
+ "unregister shm %p failed: %d", shm, rc);
+
+ for (n = 0; n < shm->num_pages; n++)
+ put_page(shm->pages[n]);
+
+ kfree(shm->pages);
+ }
+
+ if (shm->ctx)
+ teedev_ctx_put(shm->ctx);
+
+ kfree(shm);
+
+ tee_device_put(teedev);
+}
+
+static struct sg_table *tee_shm_op_map_dma_buf(struct dma_buf_attachment
+ *attach, enum dma_data_direction dir)
+{
+ return NULL;
+}
+
+static void tee_shm_op_unmap_dma_buf(struct dma_buf_attachment *attach,
+ struct sg_table *table,
+ enum dma_data_direction dir)
+{
+}
+
+static void tee_shm_op_release(struct dma_buf *dmabuf)
+{
+ struct tee_shm *shm = dmabuf->priv;
+
+ tee_shm_release(shm);
+}
+
+static void *tee_shm_op_kmap_atomic(struct dma_buf *dmabuf, unsigned long pgnum)
+{
+ return NULL;
+}
+
+static void *tee_shm_op_kmap(struct dma_buf *dmabuf, unsigned long pgnum)
+{
+ return NULL;
+}
+
+static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+ struct tee_shm *shm = dmabuf->priv;
+ size_t size = vma->vm_end - vma->vm_start;
+
+ /* Refuse sharing shared memory provided by application */
+ if (shm->flags & TEE_SHM_REGISTER)
+ return -EINVAL;
+
+ return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT,
+ size, vma->vm_page_prot);
+}
+
+static const struct dma_buf_ops tee_shm_dma_buf_ops = {
+ .map_dma_buf = tee_shm_op_map_dma_buf,
+ .unmap_dma_buf = tee_shm_op_unmap_dma_buf,
+ .release = tee_shm_op_release,
+ .kmap_atomic = tee_shm_op_kmap_atomic,
+ .kmap = tee_shm_op_kmap,
+ .mmap = tee_shm_op_mmap,
+};
+
+static struct tee_shm *__tee_shm_alloc(struct tee_context *ctx,
+ struct tee_device *teedev,
+ size_t size, u32 flags)
+{
+ struct tee_shm_pool_mgr *poolm = NULL;
+ struct tee_shm *shm;
+ void *ret;
+ int rc;
+
+ if (ctx && ctx->teedev != teedev) {
+ dev_err(teedev->dev.parent, "ctx and teedev mismatch\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!(flags & TEE_SHM_MAPPED)) {
+ dev_err(teedev->dev.parent,
+ "only mapped allocations supported\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if ((flags & ~(TEE_SHM_MAPPED | TEE_SHM_DMA_BUF))) {
+ dev_err(teedev->dev.parent, "invalid shm flags 0x%x", flags);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!tee_device_get(teedev))
+ return ERR_PTR(-EINVAL);
+
+ if (!teedev->pool) {
+ /* teedev has been detached from driver */
+ ret = ERR_PTR(-EINVAL);
+ goto err_dev_put;
+ }
+
+ shm = kzalloc(sizeof(*shm), GFP_KERNEL);
+ if (!shm) {
+ ret = ERR_PTR(-ENOMEM);
+ goto err_dev_put;
+ }
+
+ shm->flags = flags | TEE_SHM_POOL;
+ shm->teedev = teedev;
+ shm->ctx = ctx;
+ if (flags & TEE_SHM_DMA_BUF)
+ poolm = teedev->pool->dma_buf_mgr;
+ else
+ poolm = teedev->pool->private_mgr;
+
+ rc = poolm->ops->alloc(poolm, shm, size);
+ if (rc) {
+ ret = ERR_PTR(rc);
+ goto err_kfree;
+ }
+
+ mutex_lock(&teedev->mutex);
+ shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL);
+ mutex_unlock(&teedev->mutex);
+ if (shm->id < 0) {
+ ret = ERR_PTR(shm->id);
+ goto err_pool_free;
+ }
+
+ if (flags & TEE_SHM_DMA_BUF) {
+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+ exp_info.ops = &tee_shm_dma_buf_ops;
+ exp_info.size = shm->size;
+ exp_info.flags = O_RDWR;
+ exp_info.priv = shm;
+
+ shm->dmabuf = dma_buf_export(&exp_info);
+ if (IS_ERR(shm->dmabuf)) {
+ ret = ERR_CAST(shm->dmabuf);
+ goto err_rem;
+ }
+ }
+
+ if (ctx) {
+ teedev_ctx_get(ctx);
+ mutex_lock(&teedev->mutex);
+ list_add_tail(&shm->link, &ctx->list_shm);
+ mutex_unlock(&teedev->mutex);
+ }
+
+ return shm;
+err_rem:
+ mutex_lock(&teedev->mutex);
+ idr_remove(&teedev->idr, shm->id);
+ mutex_unlock(&teedev->mutex);
+err_pool_free:
+ poolm->ops->free(poolm, shm);
+err_kfree:
+ kfree(shm);
+err_dev_put:
+ tee_device_put(teedev);
+ return ret;
+}
+
+/**
+ * tee_shm_alloc() - Allocate shared memory
+ * @ctx: Context that allocates the shared memory
+ * @size: Requested size of shared memory
+ * @flags: Flags setting properties for the requested shared memory.
+ *
+ * Memory allocated as global shared memory is automatically freed when the
+ * TEE file pointer is closed. The @flags field uses the bits defined by
+ * TEE_SHM_* in <linux/tee_drv.h>. TEE_SHM_MAPPED must currently always be
+ * set. If TEE_SHM_DMA_BUF global shared memory will be allocated and
+ * associated with a dma-buf handle, else driver private memory.
+ */
+struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags)
+{
+ return __tee_shm_alloc(ctx, ctx->teedev, size, flags);
+}
+EXPORT_SYMBOL_GPL(tee_shm_alloc);
+
+struct tee_shm *tee_shm_priv_alloc(struct tee_device *teedev, size_t size)
+{
+ return __tee_shm_alloc(NULL, teedev, size, TEE_SHM_MAPPED);
+}
+EXPORT_SYMBOL_GPL(tee_shm_priv_alloc);
+
+struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
+ size_t length, u32 flags)
+{
+ struct tee_device *teedev = ctx->teedev;
+ const u32 req_flags = TEE_SHM_DMA_BUF | TEE_SHM_USER_MAPPED;
+ struct tee_shm *shm;
+ void *ret;
+ int rc;
+ int num_pages;
+ unsigned long start;
+
+ if (flags != req_flags)
+ return ERR_PTR(-ENOTSUPP);
+
+ if (!tee_device_get(teedev))
+ return ERR_PTR(-EINVAL);
+
+ if (!teedev->desc->ops->shm_register ||
+ !teedev->desc->ops->shm_unregister) {
+ tee_device_put(teedev);
+ return ERR_PTR(-ENOTSUPP);
+ }
+
+ teedev_ctx_get(ctx);
+
+ shm = kzalloc(sizeof(*shm), GFP_KERNEL);
+ if (!shm) {
+ ret = ERR_PTR(-ENOMEM);
+ goto err;
+ }
+
+ shm->flags = flags | TEE_SHM_REGISTER;
+ shm->teedev = teedev;
+ shm->ctx = ctx;
+ shm->id = -1;
+ start = rounddown(addr, PAGE_SIZE);
+ shm->offset = addr - start;
+ shm->size = length;
+ num_pages = (roundup(addr + length, PAGE_SIZE) - start) / PAGE_SIZE;
+ shm->pages = kcalloc(num_pages, sizeof(*shm->pages), GFP_KERNEL);
+ if (!shm->pages) {
+ ret = ERR_PTR(-ENOMEM);
+ goto err;
+ }
+
+ rc = get_user_pages_fast(start, num_pages, 1, shm->pages);
+ if (rc > 0)
+ shm->num_pages = rc;
+ if (rc != num_pages) {
+ if (rc >= 0)
+ rc = -ENOMEM;
+ ret = ERR_PTR(rc);
+ goto err;
+ }
+
+ mutex_lock(&teedev->mutex);
+ shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL);
+ mutex_unlock(&teedev->mutex);
+
+ if (shm->id < 0) {
+ ret = ERR_PTR(shm->id);
+ goto err;
+ }
+
+ rc = teedev->desc->ops->shm_register(ctx, shm, shm->pages,
+ shm->num_pages, start);
+ if (rc) {
+ ret = ERR_PTR(rc);
+ goto err;
+ }
+
+ if (flags & TEE_SHM_DMA_BUF) {
+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+ exp_info.ops = &tee_shm_dma_buf_ops;
+ exp_info.size = shm->size;
+ exp_info.flags = O_RDWR;
+ exp_info.priv = shm;
+
+ shm->dmabuf = dma_buf_export(&exp_info);
+ if (IS_ERR(shm->dmabuf)) {
+ ret = ERR_CAST(shm->dmabuf);
+ teedev->desc->ops->shm_unregister(ctx, shm);
+ goto err;
+ }
+ }
+
+ mutex_lock(&teedev->mutex);
+ list_add_tail(&shm->link, &ctx->list_shm);
+ mutex_unlock(&teedev->mutex);
+
+ return shm;
+err:
+ if (shm) {
+ size_t n;
+
+ if (shm->id >= 0) {
+ mutex_lock(&teedev->mutex);
+ idr_remove(&teedev->idr, shm->id);
+ mutex_unlock(&teedev->mutex);
+ }
+ if (shm->pages) {
+ for (n = 0; n < shm->num_pages; n++)
+ put_page(shm->pages[n]);
+ kfree(shm->pages);
+ }
+ }
+ kfree(shm);
+ teedev_ctx_put(ctx);
+ tee_device_put(teedev);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tee_shm_register);
+
+/**
+ * tee_shm_get_fd() - Increase reference count and return file descriptor
+ * @shm: Shared memory handle
+ * @returns user space file descriptor to shared memory
+ */
+int tee_shm_get_fd(struct tee_shm *shm)
+{
+ int fd;
+
+ if (!(shm->flags & TEE_SHM_DMA_BUF))
+ return -EINVAL;
+
+ fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC);
+ if (fd >= 0)
+ get_dma_buf(shm->dmabuf);
+ return fd;
+}
+
+/**
+ * tee_shm_free() - Free shared memory
+ * @shm: Handle to shared memory to free
+ */
+void tee_shm_free(struct tee_shm *shm)
+{
+ /*
+ * dma_buf_put() decreases the dmabuf reference counter and will
+ * call tee_shm_release() when the last reference is gone.
+ *
+ * In the case of driver private memory we call tee_shm_release
+ * directly instead as it doesn't have a reference counter.
+ */
+ if (shm->flags & TEE_SHM_DMA_BUF)
+ dma_buf_put(shm->dmabuf);
+ else
+ tee_shm_release(shm);
+}
+EXPORT_SYMBOL_GPL(tee_shm_free);
+
+/**
+ * tee_shm_va2pa() - Get physical address of a virtual address
+ * @shm: Shared memory handle
+ * @va: Virtual address to tranlsate
+ * @pa: Returned physical address
+ * @returns 0 on success and < 0 on failure
+ */
+int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa)
+{
+ if (!(shm->flags & TEE_SHM_MAPPED))
+ return -EINVAL;
+ /* Check that we're in the range of the shm */
+ if ((char *)va < (char *)shm->kaddr)
+ return -EINVAL;
+ if ((char *)va >= ((char *)shm->kaddr + shm->size))
+ return -EINVAL;
+
+ return tee_shm_get_pa(
+ shm, (unsigned long)va - (unsigned long)shm->kaddr, pa);
+}
+EXPORT_SYMBOL_GPL(tee_shm_va2pa);
+
+/**
+ * tee_shm_pa2va() - Get virtual address of a physical address
+ * @shm: Shared memory handle
+ * @pa: Physical address to tranlsate
+ * @va: Returned virtual address
+ * @returns 0 on success and < 0 on failure
+ */
+int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va)
+{
+ if (!(shm->flags & TEE_SHM_MAPPED))
+ return -EINVAL;
+ /* Check that we're in the range of the shm */
+ if (pa < shm->paddr)
+ return -EINVAL;
+ if (pa >= (shm->paddr + shm->size))
+ return -EINVAL;
+
+ if (va) {
+ void *v = tee_shm_get_va(shm, pa - shm->paddr);
+
+ if (IS_ERR(v))
+ return PTR_ERR(v);
+ *va = v;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tee_shm_pa2va);
+
+/**
+ * tee_shm_get_va() - Get virtual address of a shared memory plus an offset
+ * @shm: Shared memory handle
+ * @offs: Offset from start of this shared memory
+ * @returns virtual address of the shared memory + offs if offs is within
+ * the bounds of this shared memory, else an ERR_PTR
+ */
+void *tee_shm_get_va(struct tee_shm *shm, size_t offs)
+{
+ if (!(shm->flags & TEE_SHM_MAPPED))
+ return ERR_PTR(-EINVAL);
+ if (offs >= shm->size)
+ return ERR_PTR(-EINVAL);
+ return (char *)shm->kaddr + offs;
+}
+EXPORT_SYMBOL_GPL(tee_shm_get_va);
+
+/**
+ * tee_shm_get_pa() - Get physical address of a shared memory plus an offset
+ * @shm: Shared memory handle
+ * @offs: Offset from start of this shared memory
+ * @pa: Physical address to return
+ * @returns 0 if offs is within the bounds of this shared memory, else an
+ * error code.
+ */
+int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa)
+{
+ if (offs >= shm->size)
+ return -EINVAL;
+ if (pa)
+ *pa = shm->paddr + offs;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tee_shm_get_pa);
+
+/**
+ * tee_shm_get_from_id() - Find shared memory object and increase reference
+ * count
+ * @ctx: Context owning the shared memory
+ * @id: Id of shared memory object
+ * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure
+ */
+struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id)
+{
+ struct tee_device *teedev;
+ struct tee_shm *shm;
+
+ if (!ctx)
+ return ERR_PTR(-EINVAL);
+
+ teedev = ctx->teedev;
+ mutex_lock(&teedev->mutex);
+ shm = idr_find(&teedev->idr, id);
+ if (!shm || shm->ctx != ctx)
+ shm = ERR_PTR(-EINVAL);
+ else if (shm->flags & TEE_SHM_DMA_BUF)
+ get_dma_buf(shm->dmabuf);
+ mutex_unlock(&teedev->mutex);
+ return shm;
+}
+EXPORT_SYMBOL_GPL(tee_shm_get_from_id);
+
+/**
+ * tee_shm_put() - Decrease reference count on a shared memory handle
+ * @shm: Shared memory handle
+ */
+void tee_shm_put(struct tee_shm *shm)
+{
+ if (shm->flags & TEE_SHM_DMA_BUF)
+ dma_buf_put(shm->dmabuf);
+}
+EXPORT_SYMBOL_GPL(tee_shm_put);
diff --git a/drivers/tee/tee_shm_pool.c b/drivers/tee/tee_shm_pool.c
new file mode 100644
index 0000000..e6d4b9e
--- /dev/null
+++ b/drivers/tee/tee_shm_pool.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2015, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+#include <linux/tee_drv.h>
+#include "tee_private.h"
+
+static int pool_op_gen_alloc(struct tee_shm_pool_mgr *poolm,
+ struct tee_shm *shm, size_t size)
+{
+ unsigned long va;
+ struct gen_pool *genpool = poolm->private_data;
+ size_t s = roundup(size, 1 << genpool->min_alloc_order);
+
+ va = gen_pool_alloc(genpool, s);
+ if (!va)
+ return -ENOMEM;
+
+ memset((void *)va, 0, s);
+ shm->kaddr = (void *)va;
+ shm->paddr = gen_pool_virt_to_phys(genpool, va);
+ shm->size = s;
+ return 0;
+}
+
+static void pool_op_gen_free(struct tee_shm_pool_mgr *poolm,
+ struct tee_shm *shm)
+{
+ gen_pool_free(poolm->private_data, (unsigned long)shm->kaddr,
+ shm->size);
+ shm->kaddr = NULL;
+}
+
+static void pool_op_gen_destroy_poolmgr(struct tee_shm_pool_mgr *poolm)
+{
+ gen_pool_destroy(poolm->private_data);
+ kfree(poolm);
+}
+
+static const struct tee_shm_pool_mgr_ops pool_ops_generic = {
+ .alloc = pool_op_gen_alloc,
+ .free = pool_op_gen_free,
+ .destroy_poolmgr = pool_op_gen_destroy_poolmgr,
+};
+
+/**
+ * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved
+ * memory range
+ * @priv_info: Information for driver private shared memory pool
+ * @dmabuf_info: Information for dma-buf shared memory pool
+ *
+ * Start and end of pools will must be page aligned.
+ *
+ * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied
+ * in @dmabuf, others will use the range provided by @priv.
+ *
+ * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure.
+ */
+struct tee_shm_pool *
+tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info,
+ struct tee_shm_pool_mem_info *dmabuf_info)
+{
+ struct tee_shm_pool_mgr *priv_mgr;
+ struct tee_shm_pool_mgr *dmabuf_mgr;
+ void *rc;
+
+ /*
+ * Create the pool for driver private shared memory
+ */
+ rc = tee_shm_pool_mgr_alloc_res_mem(priv_info->vaddr, priv_info->paddr,
+ priv_info->size,
+ 3 /* 8 byte aligned */);
+ if (IS_ERR(rc))
+ return rc;
+ priv_mgr = rc;
+
+ /*
+ * Create the pool for dma_buf shared memory
+ */
+ rc = tee_shm_pool_mgr_alloc_res_mem(dmabuf_info->vaddr,
+ dmabuf_info->paddr,
+ dmabuf_info->size, PAGE_SHIFT);
+ if (IS_ERR(rc))
+ goto err_free_priv_mgr;
+ dmabuf_mgr = rc;
+
+ rc = tee_shm_pool_alloc(priv_mgr, dmabuf_mgr);
+ if (IS_ERR(rc))
+ goto err_free_dmabuf_mgr;
+
+ return rc;
+
+err_free_dmabuf_mgr:
+ tee_shm_pool_mgr_destroy(dmabuf_mgr);
+err_free_priv_mgr:
+ tee_shm_pool_mgr_destroy(priv_mgr);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(tee_shm_pool_alloc_res_mem);
+
+struct tee_shm_pool_mgr *tee_shm_pool_mgr_alloc_res_mem(unsigned long vaddr,
+ phys_addr_t paddr,
+ size_t size,
+ int min_alloc_order)
+{
+ const size_t page_mask = PAGE_SIZE - 1;
+ struct tee_shm_pool_mgr *mgr;
+ int rc;
+
+ /* Start and end must be page aligned */
+ if (vaddr & page_mask || paddr & page_mask || size & page_mask)
+ return ERR_PTR(-EINVAL);
+
+ mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
+ if (!mgr)
+ return ERR_PTR(-ENOMEM);
+
+ mgr->private_data = gen_pool_create(min_alloc_order, -1);
+ if (!mgr->private_data) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ gen_pool_set_algo(mgr->private_data, gen_pool_best_fit, NULL);
+ rc = gen_pool_add_virt(mgr->private_data, vaddr, paddr, size, -1);
+ if (rc) {
+ gen_pool_destroy(mgr->private_data);
+ goto err;
+ }
+
+ mgr->ops = &pool_ops_generic;
+
+ return mgr;
+err:
+ kfree(mgr);
+
+ return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(tee_shm_pool_mgr_alloc_res_mem);
+
+static bool check_mgr_ops(struct tee_shm_pool_mgr *mgr)
+{
+ return mgr && mgr->ops && mgr->ops->alloc && mgr->ops->free &&
+ mgr->ops->destroy_poolmgr;
+}
+
+struct tee_shm_pool *tee_shm_pool_alloc(struct tee_shm_pool_mgr *priv_mgr,
+ struct tee_shm_pool_mgr *dmabuf_mgr)
+{
+ struct tee_shm_pool *pool;
+
+ if (!check_mgr_ops(priv_mgr) || !check_mgr_ops(dmabuf_mgr))
+ return ERR_PTR(-EINVAL);
+
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return ERR_PTR(-ENOMEM);
+
+ pool->private_mgr = priv_mgr;
+ pool->dma_buf_mgr = dmabuf_mgr;
+
+ return pool;
+}
+EXPORT_SYMBOL_GPL(tee_shm_pool_alloc);
+
+/**
+ * tee_shm_pool_free() - Free a shared memory pool
+ * @pool: The shared memory pool to free
+ *
+ * There must be no remaining shared memory allocated from this pool when
+ * this function is called.
+ */
+void tee_shm_pool_free(struct tee_shm_pool *pool)
+{
+ if (pool->private_mgr)
+ tee_shm_pool_mgr_destroy(pool->private_mgr);
+ if (pool->dma_buf_mgr)
+ tee_shm_pool_mgr_destroy(pool->dma_buf_mgr);
+ kfree(pool);
+}
+EXPORT_SYMBOL_GPL(tee_shm_pool_free);
diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index f642966..2d855a9 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -23,222 +23,450 @@
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/io.h>
+#include <linux/of_device.h>
#include "thermal_core.h"
-#define TEMP0_TH (0x4)
-#define TEMP0_RST_TH (0x8)
-#define TEMP0_CFG (0xC)
-#define TEMP0_EN (0x10)
-#define TEMP0_INT_EN (0x14)
-#define TEMP0_INT_CLR (0x18)
-#define TEMP0_RST_MSK (0x1C)
-#define TEMP0_VALUE (0x28)
+#define HI6220_TEMP0_LAG (0x0)
+#define HI6220_TEMP0_TH (0x4)
+#define HI6220_TEMP0_RST_TH (0x8)
+#define HI6220_TEMP0_CFG (0xC)
+#define HI6220_TEMP0_CFG_SS_MSK (0xF000)
+#define HI6220_TEMP0_CFG_HDAK_MSK (0x30)
+#define HI6220_TEMP0_EN (0x10)
+#define HI6220_TEMP0_INT_EN (0x14)
+#define HI6220_TEMP0_INT_CLR (0x18)
+#define HI6220_TEMP0_RST_MSK (0x1C)
+#define HI6220_TEMP0_VALUE (0x28)
-#define HISI_TEMP_BASE (-60)
-#define HISI_TEMP_RESET (100000)
+#define HI3660_OFFSET(chan) ((chan) * 0x40)
+#define HI3660_TEMP(chan) (HI3660_OFFSET(chan) + 0x1C)
+#define HI3660_TH(chan) (HI3660_OFFSET(chan) + 0x20)
+#define HI3660_LAG(chan) (HI3660_OFFSET(chan) + 0x28)
+#define HI3660_INT_EN(chan) (HI3660_OFFSET(chan) + 0x2C)
+#define HI3660_INT_CLR(chan) (HI3660_OFFSET(chan) + 0x30)
-#define HISI_MAX_SENSORS 4
+#define HI6220_TEMP_BASE (-60000)
+#define HI6220_TEMP_RESET (100000)
+#define HI6220_TEMP_STEP (785)
+#define HI6220_TEMP_LAG (3500)
+
+#define HI3660_TEMP_BASE (-63780)
+#define HI3660_TEMP_STEP (205)
+#define HI3660_TEMP_LAG (4000)
+
+#define HI6220_DEFAULT_SENSOR 2
+#define HI3660_DEFAULT_SENSOR 1
struct hisi_thermal_sensor {
- struct hisi_thermal_data *thermal;
struct thermal_zone_device *tzd;
-
- long sensor_temp;
uint32_t id;
uint32_t thres_temp;
};
struct hisi_thermal_data {
- struct mutex thermal_lock; /* protects register data */
+ int (*get_temp)(struct hisi_thermal_data *data);
+ int (*enable_sensor)(struct hisi_thermal_data *data);
+ int (*disable_sensor)(struct hisi_thermal_data *data);
+ int (*irq_handler)(struct hisi_thermal_data *data);
struct platform_device *pdev;
struct clk *clk;
- struct hisi_thermal_sensor sensors[HISI_MAX_SENSORS];
-
- int irq, irq_bind_sensor;
- bool irq_enabled;
-
+ struct hisi_thermal_sensor sensor;
void __iomem *regs;
+ int irq;
};
-/* in millicelsius */
-static inline int _step_to_temp(int step)
+/*
+ * The temperature computation on the tsensor is as follow:
+ * Unit: millidegree Celsius
+ * Step: 200/255 (0.7843)
+ * Temperature base: -60°C
+ *
+ * The register is programmed in temperature steps, every step is 785
+ * millidegree and begins at -60 000 m°C
+ *
+ * The temperature from the steps:
+ *
+ * Temp = TempBase + (steps x 785)
+ *
+ * and the steps from the temperature:
+ *
+ * steps = (Temp - TempBase) / 785
+ *
+ */
+static inline int hi6220_thermal_step_to_temp(int step)
{
- /*
- * Every step equals (1 * 200) / 255 celsius, and finally
- * need convert to millicelsius.
- */
- return (HISI_TEMP_BASE * 1000 + (step * 200000 / 255));
+ return HI6220_TEMP_BASE + (step * HI6220_TEMP_STEP);
}
-static inline long _temp_to_step(long temp)
+static inline int hi6220_thermal_temp_to_step(int temp)
{
- return ((temp - HISI_TEMP_BASE * 1000) * 255) / 200000;
+ return DIV_ROUND_UP(temp - HI6220_TEMP_BASE, HI6220_TEMP_STEP);
}
-static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data,
- struct hisi_thermal_sensor *sensor)
+/*
+ * for Hi3660,
+ * Step: 189/922 (0.205)
+ * Temperature base: -63.780°C
+ *
+ * The register is programmed in temperature steps, every step is 205
+ * millidegree and begins at -63 780 m°C
+ */
+static inline int hi3660_thermal_step_to_temp(int step)
{
- long val;
-
- mutex_lock(&data->thermal_lock);
-
- /* disable interrupt */
- writel(0x0, data->regs + TEMP0_INT_EN);
- writel(0x1, data->regs + TEMP0_INT_CLR);
-
- /* disable module firstly */
- writel(0x0, data->regs + TEMP0_EN);
-
- /* select sensor id */
- writel((sensor->id << 12), data->regs + TEMP0_CFG);
-
- /* enable module */
- writel(0x1, data->regs + TEMP0_EN);
-
- usleep_range(3000, 5000);
-
- val = readl(data->regs + TEMP0_VALUE);
- val = _step_to_temp(val);
-
- mutex_unlock(&data->thermal_lock);
-
- return val;
+ return HI3660_TEMP_BASE + step * HI3660_TEMP_STEP;
}
-static void hisi_thermal_enable_bind_irq_sensor
- (struct hisi_thermal_data *data)
+static inline int hi3660_thermal_temp_to_step(int temp)
{
- struct hisi_thermal_sensor *sensor;
-
- mutex_lock(&data->thermal_lock);
-
- sensor = &data->sensors[data->irq_bind_sensor];
-
- /* setting the hdak time */
- writel(0x0, data->regs + TEMP0_CFG);
-
- /* disable module firstly */
- writel(0x0, data->regs + TEMP0_RST_MSK);
- writel(0x0, data->regs + TEMP0_EN);
-
- /* select sensor id */
- writel((sensor->id << 12), data->regs + TEMP0_CFG);
-
- /* enable for interrupt */
- writel(_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00,
- data->regs + TEMP0_TH);
-
- writel(_temp_to_step(HISI_TEMP_RESET), data->regs + TEMP0_RST_TH);
-
- /* enable module */
- writel(0x1, data->regs + TEMP0_RST_MSK);
- writel(0x1, data->regs + TEMP0_EN);
-
- writel(0x0, data->regs + TEMP0_INT_CLR);
- writel(0x1, data->regs + TEMP0_INT_EN);
-
- usleep_range(3000, 5000);
-
- mutex_unlock(&data->thermal_lock);
+ return DIV_ROUND_UP(temp - HI3660_TEMP_BASE, HI3660_TEMP_STEP);
}
-static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data)
+/*
+ * The lag register contains 5 bits encoding the temperature in steps.
+ *
+ * Each time the temperature crosses the threshold boundary, an
+ * interrupt is raised. It could be when the temperature is going
+ * above the threshold or below. However, if the temperature is
+ * fluctuating around this value due to the load, we can receive
+ * several interrupts which may not desired.
+ *
+ * We can setup a temperature representing the delta between the
+ * threshold and the current temperature when the temperature is
+ * decreasing.
+ *
+ * For instance: the lag register is 5°C, the threshold is 65°C, when
+ * the temperature reaches 65°C an interrupt is raised and when the
+ * temperature decrease to 65°C - 5°C another interrupt is raised.
+ *
+ * A very short lag can lead to an interrupt storm, a long lag
+ * increase the latency to react to the temperature changes. In our
+ * case, that is not really a problem as we are polling the
+ * temperature.
+ *
+ * [0:4] : lag register
+ *
+ * The temperature is coded in steps, cf. HI6220_TEMP_STEP.
+ *
+ * Min : 0x00 : 0.0 °C
+ * Max : 0x1F : 24.3 °C
+ *
+ * The 'value' parameter is in milliCelsius.
+ */
+static inline void hi6220_thermal_set_lag(void __iomem *addr, int value)
{
- mutex_lock(&data->thermal_lock);
+ writel(DIV_ROUND_UP(value, HI6220_TEMP_STEP) & 0x1F,
+ addr + HI6220_TEMP0_LAG);
+}
+static inline void hi6220_thermal_alarm_clear(void __iomem *addr, int value)
+{
+ writel(value, addr + HI6220_TEMP0_INT_CLR);
+}
+
+static inline void hi6220_thermal_alarm_enable(void __iomem *addr, int value)
+{
+ writel(value, addr + HI6220_TEMP0_INT_EN);
+}
+
+static inline void hi6220_thermal_alarm_set(void __iomem *addr, int temp)
+{
+ writel(hi6220_thermal_temp_to_step(temp) | 0x0FFFFFF00,
+ addr + HI6220_TEMP0_TH);
+}
+
+static inline void hi6220_thermal_reset_set(void __iomem *addr, int temp)
+{
+ writel(hi6220_thermal_temp_to_step(temp), addr + HI6220_TEMP0_RST_TH);
+}
+
+static inline void hi6220_thermal_reset_enable(void __iomem *addr, int value)
+{
+ writel(value, addr + HI6220_TEMP0_RST_MSK);
+}
+
+static inline void hi6220_thermal_enable(void __iomem *addr, int value)
+{
+ writel(value, addr + HI6220_TEMP0_EN);
+}
+
+static inline int hi6220_thermal_get_temperature(void __iomem *addr)
+{
+ return hi6220_thermal_step_to_temp(readl(addr + HI6220_TEMP0_VALUE));
+}
+
+/*
+ * [0:6] lag register
+ *
+ * The temperature is coded in steps, cf. HI3660_TEMP_STEP.
+ *
+ * Min : 0x00 : 0.0 °C
+ * Max : 0x7F : 26.0 °C
+ *
+ */
+static inline void hi3660_thermal_set_lag(void __iomem *addr,
+ int id, int value)
+{
+ writel(DIV_ROUND_UP(value, HI3660_TEMP_STEP) & 0x7F,
+ addr + HI3660_LAG(id));
+}
+
+static inline void hi3660_thermal_alarm_clear(void __iomem *addr,
+ int id, int value)
+{
+ writel(value, addr + HI3660_INT_CLR(id));
+}
+
+static inline void hi3660_thermal_alarm_enable(void __iomem *addr,
+ int id, int value)
+{
+ writel(value, addr + HI3660_INT_EN(id));
+}
+
+static inline void hi3660_thermal_alarm_set(void __iomem *addr,
+ int id, int value)
+{
+ writel(value, addr + HI3660_TH(id));
+}
+
+static inline int hi3660_thermal_get_temperature(void __iomem *addr, int id)
+{
+ return hi3660_thermal_step_to_temp(readl(addr + HI3660_TEMP(id)));
+}
+
+/*
+ * Temperature configuration register - Sensor selection
+ *
+ * Bits [19:12]
+ *
+ * 0x0: local sensor (default)
+ * 0x1: remote sensor 1 (ACPU cluster 1)
+ * 0x2: remote sensor 2 (ACPU cluster 0)
+ * 0x3: remote sensor 3 (G3D)
+ */
+static inline void hi6220_thermal_sensor_select(void __iomem *addr, int sensor)
+{
+ writel((readl(addr + HI6220_TEMP0_CFG) & ~HI6220_TEMP0_CFG_SS_MSK) |
+ (sensor << 12), addr + HI6220_TEMP0_CFG);
+}
+
+/*
+ * Temperature configuration register - Hdak conversion polling interval
+ *
+ * Bits [5:4]
+ *
+ * 0x0 : 0.768 ms
+ * 0x1 : 6.144 ms
+ * 0x2 : 49.152 ms
+ * 0x3 : 393.216 ms
+ */
+static inline void hi6220_thermal_hdak_set(void __iomem *addr, int value)
+{
+ writel((readl(addr + HI6220_TEMP0_CFG) & ~HI6220_TEMP0_CFG_HDAK_MSK) |
+ (value << 4), addr + HI6220_TEMP0_CFG);
+}
+
+static int hi6220_thermal_irq_handler(struct hisi_thermal_data *data)
+{
+ hi6220_thermal_alarm_clear(data->regs, 1);
+ return 0;
+}
+
+static int hi3660_thermal_irq_handler(struct hisi_thermal_data *data)
+{
+ hi3660_thermal_alarm_clear(data->regs, data->sensor.id, 1);
+ return 0;
+}
+
+static int hi6220_thermal_get_temp(struct hisi_thermal_data *data)
+{
+ return hi6220_thermal_get_temperature(data->regs);
+}
+
+static int hi3660_thermal_get_temp(struct hisi_thermal_data *data)
+{
+ return hi3660_thermal_get_temperature(data->regs, data->sensor.id);
+}
+
+static int hi6220_thermal_disable_sensor(struct hisi_thermal_data *data)
+{
/* disable sensor module */
- writel(0x0, data->regs + TEMP0_INT_EN);
- writel(0x0, data->regs + TEMP0_RST_MSK);
- writel(0x0, data->regs + TEMP0_EN);
+ hi6220_thermal_enable(data->regs, 0);
+ hi6220_thermal_alarm_enable(data->regs, 0);
+ hi6220_thermal_reset_enable(data->regs, 0);
- mutex_unlock(&data->thermal_lock);
-}
-
-static int hisi_thermal_get_temp(void *_sensor, int *temp)
-{
- struct hisi_thermal_sensor *sensor = _sensor;
- struct hisi_thermal_data *data = sensor->thermal;
-
- int sensor_id = -1, i;
- long max_temp = 0;
-
- *temp = hisi_thermal_get_sensor_temp(data, sensor);
-
- sensor->sensor_temp = *temp;
-
- for (i = 0; i < HISI_MAX_SENSORS; i++) {
- if (!data->sensors[i].tzd)
- continue;
-
- if (data->sensors[i].sensor_temp >= max_temp) {
- max_temp = data->sensors[i].sensor_temp;
- sensor_id = i;
- }
- }
-
- /* If no sensor has been enabled, then skip to enable irq */
- if (sensor_id == -1)
- return 0;
-
- mutex_lock(&data->thermal_lock);
- data->irq_bind_sensor = sensor_id;
- mutex_unlock(&data->thermal_lock);
-
- dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%d, thres=%d\n",
- sensor->id, data->irq_enabled, *temp, sensor->thres_temp);
- /*
- * Bind irq to sensor for two cases:
- * Reenable alarm IRQ if temperature below threshold;
- * if irq has been enabled, always set it;
- */
- if (data->irq_enabled) {
- hisi_thermal_enable_bind_irq_sensor(data);
- return 0;
- }
-
- if (max_temp < sensor->thres_temp) {
- data->irq_enabled = true;
- hisi_thermal_enable_bind_irq_sensor(data);
- enable_irq(data->irq);
- }
+ clk_disable_unprepare(data->clk);
return 0;
}
-static struct thermal_zone_of_device_ops hisi_of_thermal_ops = {
+static int hi3660_thermal_disable_sensor(struct hisi_thermal_data *data)
+{
+ /* disable sensor module */
+ hi3660_thermal_alarm_enable(data->regs, data->sensor.id, 0);
+ return 0;
+}
+
+static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data)
+{
+ struct hisi_thermal_sensor *sensor = &data->sensor;
+ int ret;
+
+ /* enable clock for tsensor */
+ ret = clk_prepare_enable(data->clk);
+ if (ret)
+ return ret;
+
+ /* disable module firstly */
+ hi6220_thermal_reset_enable(data->regs, 0);
+ hi6220_thermal_enable(data->regs, 0);
+
+ /* select sensor id */
+ hi6220_thermal_sensor_select(data->regs, sensor->id);
+
+ /* setting the hdak time */
+ hi6220_thermal_hdak_set(data->regs, 0);
+
+ /* setting lag value between current temp and the threshold */
+ hi6220_thermal_set_lag(data->regs, HI6220_TEMP_LAG);
+
+ /* enable for interrupt */
+ hi6220_thermal_alarm_set(data->regs, sensor->thres_temp);
+
+ hi6220_thermal_reset_set(data->regs, HI6220_TEMP_RESET);
+
+ /* enable module */
+ hi6220_thermal_reset_enable(data->regs, 1);
+ hi6220_thermal_enable(data->regs, 1);
+
+ hi6220_thermal_alarm_clear(data->regs, 0);
+ hi6220_thermal_alarm_enable(data->regs, 1);
+
+ return 0;
+}
+
+static int hi3660_thermal_enable_sensor(struct hisi_thermal_data *data)
+{
+ unsigned int value;
+ struct hisi_thermal_sensor *sensor = &data->sensor;
+
+ /* disable interrupt */
+ hi3660_thermal_alarm_enable(data->regs, sensor->id, 0);
+
+ /* setting lag value between current temp and the threshold */
+ hi3660_thermal_set_lag(data->regs, sensor->id, HI3660_TEMP_LAG);
+
+ /* set interrupt threshold */
+ value = hi3660_thermal_temp_to_step(sensor->thres_temp);
+ hi3660_thermal_alarm_set(data->regs, sensor->id, value);
+
+ /* enable interrupt */
+ hi3660_thermal_alarm_clear(data->regs, sensor->id, 1);
+ hi3660_thermal_alarm_enable(data->regs, sensor->id, 1);
+
+ return 0;
+}
+
+static int hi6220_thermal_probe(struct hisi_thermal_data *data)
+{
+ struct platform_device *pdev = data->pdev;
+ struct device *dev = &pdev->dev;
+ struct resource *res;
+ int ret;
+
+ data->get_temp = hi6220_thermal_get_temp;
+ data->enable_sensor = hi6220_thermal_enable_sensor;
+ data->disable_sensor = hi6220_thermal_disable_sensor;
+ data->irq_handler = hi6220_thermal_irq_handler;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ data->regs = devm_ioremap_resource(dev, res);
+ if (IS_ERR(data->regs)) {
+ dev_err(dev, "failed to get io address\n");
+ return PTR_ERR(data->regs);
+ }
+
+ data->clk = devm_clk_get(dev, "thermal_clk");
+ if (IS_ERR(data->clk)) {
+ ret = PTR_ERR(data->clk);
+ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "failed to get thermal clk: %d\n", ret);
+ return ret;
+ }
+
+ data->irq = platform_get_irq(pdev, 0);
+ if (data->irq < 0)
+ return data->irq;
+
+ data->sensor.id = HI6220_DEFAULT_SENSOR;
+
+ return 0;
+}
+
+static int hi3660_thermal_probe(struct hisi_thermal_data *data)
+{
+ struct platform_device *pdev = data->pdev;
+ struct device *dev = &pdev->dev;
+ struct resource *res;
+
+ data->get_temp = hi3660_thermal_get_temp;
+ data->enable_sensor = hi3660_thermal_enable_sensor;
+ data->disable_sensor = hi3660_thermal_disable_sensor;
+ data->irq_handler = hi3660_thermal_irq_handler;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ data->regs = devm_ioremap_resource(dev, res);
+ if (IS_ERR(data->regs)) {
+ dev_err(dev, "failed to get io address\n");
+ return PTR_ERR(data->regs);
+ }
+
+ data->irq = platform_get_irq(pdev, 0);
+ if (data->irq < 0)
+ return data->irq;
+
+ data->sensor.id = HI3660_DEFAULT_SENSOR;
+
+ return 0;
+}
+
+static int hisi_thermal_get_temp(void *__data, int *temp)
+{
+ struct hisi_thermal_data *data = __data;
+ struct hisi_thermal_sensor *sensor = &data->sensor;
+
+ *temp = data->get_temp(data);
+
+ dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n",
+ sensor->id, *temp, sensor->thres_temp);
+
+ return 0;
+}
+
+static const struct thermal_zone_of_device_ops hisi_of_thermal_ops = {
.get_temp = hisi_thermal_get_temp,
};
-static irqreturn_t hisi_thermal_alarm_irq(int irq, void *dev)
-{
- struct hisi_thermal_data *data = dev;
-
- disable_irq_nosync(irq);
- data->irq_enabled = false;
-
- return IRQ_WAKE_THREAD;
-}
-
static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev)
{
struct hisi_thermal_data *data = dev;
- struct hisi_thermal_sensor *sensor;
- int i;
+ struct hisi_thermal_sensor *sensor = &data->sensor;
+ int temp = 0;
- mutex_lock(&data->thermal_lock);
- sensor = &data->sensors[data->irq_bind_sensor];
+ data->irq_handler(data);
- dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n",
- sensor->thres_temp / 1000);
- mutex_unlock(&data->thermal_lock);
+ hisi_thermal_get_temp(data, &temp);
- for (i = 0; i < HISI_MAX_SENSORS; i++) {
- if (!data->sensors[i].tzd)
- continue;
+ if (temp >= sensor->thres_temp) {
+ dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n",
+ temp, sensor->thres_temp);
- thermal_zone_device_update(data->sensors[i].tzd,
+ thermal_zone_device_update(data->sensor.tzd,
THERMAL_EVENT_UNSPECIFIED);
+
+ } else {
+ dev_crit(&data->pdev->dev, "THERMAL ALARM stopped: %d < %d\n",
+ temp, sensor->thres_temp);
}
return IRQ_HANDLED;
@@ -246,17 +474,14 @@
static int hisi_thermal_register_sensor(struct platform_device *pdev,
struct hisi_thermal_data *data,
- struct hisi_thermal_sensor *sensor,
- int index)
+ struct hisi_thermal_sensor *sensor)
{
int ret, i;
const struct thermal_trip *trip;
- sensor->id = index;
- sensor->thermal = data;
-
sensor->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev,
- sensor->id, sensor, &hisi_of_thermal_ops);
+ sensor->id, data,
+ &hisi_of_thermal_ops);
if (IS_ERR(sensor->tzd)) {
ret = PTR_ERR(sensor->tzd);
sensor->tzd = NULL;
@@ -278,7 +503,14 @@
}
static const struct of_device_id of_hisi_thermal_match[] = {
- { .compatible = "hisilicon,tsensor" },
+ {
+ .compatible = "hisilicon,tsensor",
+ .data = hi6220_thermal_probe
+ },
+ {
+ .compatible = "hisilicon,hi3660-tsensor",
+ .data = hi3660_thermal_probe
+ },
{ /* end */ }
};
MODULE_DEVICE_TABLE(of, of_hisi_thermal_match);
@@ -295,88 +527,63 @@
static int hisi_thermal_probe(struct platform_device *pdev)
{
struct hisi_thermal_data *data;
- struct resource *res;
- int i;
+ int const (*platform_probe)(struct hisi_thermal_data *);
+ struct device *dev = &pdev->dev;
int ret;
- data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
- mutex_init(&data->thermal_lock);
data->pdev = pdev;
-
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- data->regs = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(data->regs)) {
- dev_err(&pdev->dev, "failed to get io address\n");
- return PTR_ERR(data->regs);
- }
-
- data->irq = platform_get_irq(pdev, 0);
- if (data->irq < 0)
- return data->irq;
-
- ret = devm_request_threaded_irq(&pdev->dev, data->irq,
- hisi_thermal_alarm_irq,
- hisi_thermal_alarm_irq_thread,
- 0, "hisi_thermal", data);
- if (ret < 0) {
- dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret);
- return ret;
- }
-
platform_set_drvdata(pdev, data);
- data->clk = devm_clk_get(&pdev->dev, "thermal_clk");
- if (IS_ERR(data->clk)) {
- ret = PTR_ERR(data->clk);
- if (ret != -EPROBE_DEFER)
- dev_err(&pdev->dev,
- "failed to get thermal clk: %d\n", ret);
- return ret;
+ platform_probe = of_device_get_match_data(dev);
+ if (!platform_probe) {
+ dev_err(dev, "failed to get probe func\n");
+ return -EINVAL;
}
- /* enable clock for thermal */
- ret = clk_prepare_enable(data->clk);
+ ret = platform_probe(data);
+ if (ret)
+ return ret;
+
+ ret = hisi_thermal_register_sensor(pdev, data,
+ &data->sensor);
if (ret) {
- dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret);
+ dev_err(dev, "failed to register thermal sensor: %d\n", ret);
return ret;
}
- hisi_thermal_enable_bind_irq_sensor(data);
- irq_get_irqchip_state(data->irq, IRQCHIP_STATE_MASKED,
- &data->irq_enabled);
-
- for (i = 0; i < HISI_MAX_SENSORS; ++i) {
- ret = hisi_thermal_register_sensor(pdev, data,
- &data->sensors[i], i);
- if (ret)
- dev_err(&pdev->dev,
- "failed to register thermal sensor: %d\n", ret);
- else
- hisi_thermal_toggle_sensor(&data->sensors[i], true);
+ ret = data->enable_sensor(data);
+ if (ret) {
+ dev_err(dev, "Failed to setup the sensor: %d\n", ret);
+ return ret;
}
+ if (data->irq) {
+ ret = devm_request_threaded_irq(dev, data->irq, NULL,
+ hisi_thermal_alarm_irq_thread,
+ IRQF_ONESHOT, "hisi_thermal", data);
+ if (ret < 0) {
+ dev_err(dev, "failed to request alarm irq: %d\n", ret);
+ return ret;
+ }
+ }
+
+ hisi_thermal_toggle_sensor(&data->sensor, true);
+
return 0;
}
static int hisi_thermal_remove(struct platform_device *pdev)
{
struct hisi_thermal_data *data = platform_get_drvdata(pdev);
- int i;
+ struct hisi_thermal_sensor *sensor = &data->sensor;
- for (i = 0; i < HISI_MAX_SENSORS; i++) {
- struct hisi_thermal_sensor *sensor = &data->sensors[i];
+ hisi_thermal_toggle_sensor(sensor, false);
- if (!sensor->tzd)
- continue;
-
- hisi_thermal_toggle_sensor(sensor, false);
- }
-
- hisi_thermal_disable_sensor(data);
- clk_disable_unprepare(data->clk);
+ data->disable_sensor(data);
return 0;
}
@@ -386,10 +593,7 @@
{
struct hisi_thermal_data *data = dev_get_drvdata(dev);
- hisi_thermal_disable_sensor(data);
- data->irq_enabled = false;
-
- clk_disable_unprepare(data->clk);
+ data->disable_sensor(data);
return 0;
}
@@ -398,12 +602,7 @@
{
struct hisi_thermal_data *data = dev_get_drvdata(dev);
- clk_prepare_enable(data->clk);
-
- data->irq_enabled = true;
- hisi_thermal_enable_bind_irq_sensor(data);
-
- return 0;
+ return data->enable_sensor(data);
}
#endif
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index bcef2e7..1eea63c 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -31,8 +31,7 @@
* If the temperature is higher than a trip point,
* a. if the trend is THERMAL_TREND_RAISING, use higher cooling
* state for this trip point
- * b. if the trend is THERMAL_TREND_DROPPING, use lower cooling
- * state for this trip point
+ * b. if the trend is THERMAL_TREND_DROPPING, do nothing
* c. if the trend is THERMAL_TREND_RAISE_FULL, use upper limit
* for this trip point
* d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit
@@ -94,9 +93,11 @@
if (!throttle)
next_target = THERMAL_NO_TARGET;
} else {
- next_target = cur_state - 1;
- if (next_target > instance->upper)
- next_target = instance->upper;
+ if (!throttle) {
+ next_target = cur_state - 1;
+ if (next_target > instance->upper)
+ next_target = instance->upper;
+ }
}
break;
case THERMAL_TREND_DROP_FULL:
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index bdf0e6e..faf50df 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1764,7 +1764,7 @@
{
struct n_tty_data *ldata = tty->disc_data;
- if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) {
+ if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) {
bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
ldata->line_start = ldata->read_tail;
if (!L_ICANON(tty) || !read_cnt(ldata)) {
@@ -2427,7 +2427,7 @@
return put_user(tty_chars_in_buffer(tty), (int __user *) arg);
case TIOCINQ:
down_write(&tty->termios_rwsem);
- if (L_ICANON(tty))
+ if (L_ICANON(tty) && !L_EXTPROC(tty))
retval = inq_canon(ldata);
else
retval = read_cnt(ldata);
diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c
index f8c3107..2ffebb7 100644
--- a/drivers/tty/serial/8250/8250_fintek.c
+++ b/drivers/tty/serial/8250/8250_fintek.c
@@ -121,7 +121,7 @@
if ((!!(rs485->flags & SER_RS485_RTS_ON_SEND)) ==
(!!(rs485->flags & SER_RS485_RTS_AFTER_SEND)))
- rs485->flags &= SER_RS485_ENABLED;
+ rs485->flags &= ~SER_RS485_ENABLED;
else
config |= RS485_URA;
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 22d32d2..b80ea87 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -5568,6 +5568,9 @@
{ PCI_DEVICE(0x1601, 0x0800), .driver_data = pbn_b0_4_1250000 },
{ PCI_DEVICE(0x1601, 0xa801), .driver_data = pbn_b0_4_1250000 },
+ /* Amazon PCI serial device */
+ { PCI_DEVICE(0x1d0f, 0x8250), .driver_data = pbn_b0_1_115200 },
+
/*
* These entries match devices with class COMMUNICATION_SERIAL,
* COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 1ef31e3..f6e4373 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2526,8 +2526,11 @@
serial_dl_write(up, quot);
/* XR17V35x UARTs have an extra fractional divisor register (DLD) */
- if (up->port.type == PORT_XR17V35X)
+ if (up->port.type == PORT_XR17V35X) {
+ /* Preserve bits not related to baudrate; DLD[7:4]. */
+ quot_frac |= serial_port_in(port, 0x2) & 0xf0;
serial_port_out(port, 0x2, quot_frac);
+ }
}
static unsigned int serial8250_get_baud_rate(struct uart_port *port,
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index a70356d..521a6e4 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -2239,12 +2239,14 @@
val &= ~UCR3_AWAKEN;
writel(val, sport->port.membase + UCR3);
- val = readl(sport->port.membase + UCR1);
- if (on)
- val |= UCR1_RTSDEN;
- else
- val &= ~UCR1_RTSDEN;
- writel(val, sport->port.membase + UCR1);
+ if (sport->have_rtscts) {
+ val = readl(sport->port.membase + UCR1);
+ if (on)
+ val |= UCR1_RTSDEN;
+ else
+ val &= ~UCR1_RTSDEN;
+ writel(val, sport->port.membase + UCR1);
+ }
}
static int imx_serial_port_suspend_noirq(struct device *dev)
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 2e2b88a..dce39de 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -968,6 +968,8 @@
}
} else {
retval = uart_startup(tty, state, 1);
+ if (retval == 0)
+ tty_port_set_initialized(port, true);
if (retval > 0)
retval = 0;
}
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 701c085..53cbf4e 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -243,8 +243,10 @@
* architecture has no support for it:
*/
if (!trigger_all_cpu_backtrace()) {
- struct pt_regs *regs = get_irq_regs();
+ struct pt_regs *regs = NULL;
+ if (in_irq())
+ regs = get_irq_regs();
if (regs) {
pr_info("CPU%d:\n", smp_processor_id());
show_regs(regs);
@@ -263,7 +265,10 @@
static void sysrq_handle_showregs(int key)
{
- struct pt_regs *regs = get_irq_regs();
+ struct pt_regs *regs = NULL;
+
+ if (in_irq())
+ regs = get_irq_regs();
if (regs)
show_regs(regs);
perf_event_print_debug();
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index c220c2c..e99f1c5 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -446,7 +446,7 @@
* Callers other than flush_to_ldisc() need to exclude the kworker
* from concurrent use of the line discipline, see paste_selection().
*
- * Returns the number of bytes not processed
+ * Returns the number of bytes processed
*/
int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p,
char *f, int count)
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 734a635..8d9f9a8 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1543,6 +1543,9 @@
"%s: %s driver does not set tty->port. This will crash the kernel later. Fix the driver!\n",
__func__, tty->driver->name);
+ retval = tty_ldisc_lock(tty, 5 * HZ);
+ if (retval)
+ goto err_release_lock;
tty->port->itty = tty;
/*
@@ -1553,6 +1556,7 @@
retval = tty_ldisc_setup(tty, tty->link);
if (retval)
goto err_release_tty;
+ tty_ldisc_unlock(tty);
/* Return the tty locked so that it cannot vanish under the caller */
return tty;
@@ -1565,9 +1569,11 @@
/* call the tty release_tty routine to clean out this slot */
err_release_tty:
- tty_unlock(tty);
+ tty_ldisc_unlock(tty);
tty_info_ratelimited(tty, "ldisc open failed (%d), clearing slot %d\n",
retval, idx);
+err_release_lock:
+ tty_unlock(tty);
release_tty(tty, idx);
return ERR_PTR(retval);
}
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 68947f6..3a9e2a2 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -271,10 +271,13 @@
struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
{
+ struct tty_ldisc *ld;
+
ldsem_down_read(&tty->ldisc_sem, MAX_SCHEDULE_TIMEOUT);
- if (!tty->ldisc)
+ ld = tty->ldisc;
+ if (!ld)
ldsem_up_read(&tty->ldisc_sem);
- return tty->ldisc;
+ return ld;
}
EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
@@ -333,7 +336,7 @@
ldsem_up_write(&tty->ldisc_sem);
}
-static int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout)
+int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout)
{
int ret;
@@ -344,7 +347,7 @@
return 0;
}
-static void tty_ldisc_unlock(struct tty_struct *tty)
+void tty_ldisc_unlock(struct tty_struct *tty)
{
clear_bit(TTY_LDISC_HALTED, &tty->flags);
__tty_ldisc_unlock(tty);
@@ -489,41 +492,6 @@
}
/**
- * tty_ldisc_restore - helper for tty ldisc change
- * @tty: tty to recover
- * @old: previous ldisc
- *
- * Restore the previous line discipline or N_TTY when a line discipline
- * change fails due to an open error
- */
-
-static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
-{
- struct tty_ldisc *new_ldisc;
- int r;
-
- /* There is an outstanding reference here so this is safe */
- old = tty_ldisc_get(tty, old->ops->num);
- WARN_ON(IS_ERR(old));
- tty->ldisc = old;
- tty_set_termios_ldisc(tty, old->ops->num);
- if (tty_ldisc_open(tty, old) < 0) {
- tty_ldisc_put(old);
- /* This driver is always present */
- new_ldisc = tty_ldisc_get(tty, N_TTY);
- if (IS_ERR(new_ldisc))
- panic("n_tty: get");
- tty->ldisc = new_ldisc;
- tty_set_termios_ldisc(tty, N_TTY);
- r = tty_ldisc_open(tty, new_ldisc);
- if (r < 0)
- panic("Couldn't open N_TTY ldisc for "
- "%s --- error %d.",
- tty_name(tty), r);
- }
-}
-
-/**
* tty_set_ldisc - set line discipline
* @tty: the terminal to set
* @ldisc: the line discipline
@@ -536,12 +504,7 @@
int tty_set_ldisc(struct tty_struct *tty, int disc)
{
- int retval;
- struct tty_ldisc *old_ldisc, *new_ldisc;
-
- new_ldisc = tty_ldisc_get(tty, disc);
- if (IS_ERR(new_ldisc))
- return PTR_ERR(new_ldisc);
+ int retval, old_disc;
tty_lock(tty);
retval = tty_ldisc_lock(tty, 5 * HZ);
@@ -554,7 +517,8 @@
}
/* Check the no-op case */
- if (tty->ldisc->ops->num == disc)
+ old_disc = tty->ldisc->ops->num;
+ if (old_disc == disc)
goto out;
if (test_bit(TTY_HUPPED, &tty->flags)) {
@@ -563,34 +527,25 @@
goto out;
}
- old_ldisc = tty->ldisc;
-
- /* Shutdown the old discipline. */
- tty_ldisc_close(tty, old_ldisc);
-
- /* Now set up the new line discipline. */
- tty->ldisc = new_ldisc;
- tty_set_termios_ldisc(tty, disc);
-
- retval = tty_ldisc_open(tty, new_ldisc);
+ retval = tty_ldisc_reinit(tty, disc);
if (retval < 0) {
/* Back to the old one or N_TTY if we can't */
- tty_ldisc_put(new_ldisc);
- tty_ldisc_restore(tty, old_ldisc);
+ if (tty_ldisc_reinit(tty, old_disc) < 0) {
+ pr_err("tty: TIOCSETD failed, reinitializing N_TTY\n");
+ if (tty_ldisc_reinit(tty, N_TTY) < 0) {
+ /* At this point we have tty->ldisc == NULL. */
+ pr_err("tty: reinitializing N_TTY failed\n");
+ }
+ }
}
- if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) {
+ if (tty->ldisc && tty->ldisc->ops->num != old_disc &&
+ tty->ops->set_ldisc) {
down_read(&tty->termios_rwsem);
tty->ops->set_ldisc(tty);
up_read(&tty->termios_rwsem);
}
- /* At this point we hold a reference to the new ldisc and a
- reference to the old ldisc, or we hold two references to
- the old ldisc (if it was restored as part of error cleanup
- above). In either case, releasing a single reference from
- the old ldisc is correct. */
- new_ldisc = old_ldisc;
out:
tty_ldisc_unlock(tty);
@@ -598,7 +553,6 @@
already running */
tty_buffer_restart_work(tty->port);
err:
- tty_ldisc_put(new_ldisc); /* drop the extra reference */
tty_unlock(tty);
return retval;
}
@@ -659,10 +613,8 @@
int retval;
ld = tty_ldisc_get(tty, disc);
- if (IS_ERR(ld)) {
- BUG_ON(disc == N_TTY);
+ if (IS_ERR(ld))
return PTR_ERR(ld);
- }
if (tty->ldisc) {
tty_ldisc_close(tty, tty->ldisc);
@@ -674,10 +626,8 @@
tty_set_termios_ldisc(tty, disc);
retval = tty_ldisc_open(tty, tty->ldisc);
if (retval) {
- if (!WARN_ON(disc == N_TTY)) {
- tty_ldisc_put(tty->ldisc);
- tty->ldisc = NULL;
- }
+ tty_ldisc_put(tty->ldisc);
+ tty->ldisc = NULL;
}
return retval;
}
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index ea20b2c..34d23cc 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -375,7 +375,7 @@
res = usb_submit_urb(acm->read_urbs[index], mem_flags);
if (res) {
- if (res != -EPERM) {
+ if (res != -EPERM && res != -ENODEV) {
dev_err(&acm->data->dev,
"urb %d failed submission with %d\n",
index, res);
@@ -1706,6 +1706,9 @@
{ USB_DEVICE(0x0ace, 0x1611), /* ZyDAS 56K USB MODEM - new version */
.driver_info = SINGLE_RX_URB, /* firmware bug */
},
+ { USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */
+ .driver_info = SINGLE_RX_URB,
+ },
{ USB_DEVICE(0x22b8, 0x7000), /* Motorola Q Phone */
.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
},
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 5008f71..7c54a19 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -550,6 +550,9 @@
unsigned iad_num = 0;
memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE);
+ nintf = nintf_orig = config->desc.bNumInterfaces;
+ config->desc.bNumInterfaces = 0; // Adjusted later
+
if (config->desc.bDescriptorType != USB_DT_CONFIG ||
config->desc.bLength < USB_DT_CONFIG_SIZE ||
config->desc.bLength > size) {
@@ -563,7 +566,6 @@
buffer += config->desc.bLength;
size -= config->desc.bLength;
- nintf = nintf_orig = config->desc.bNumInterfaces;
if (nintf > USB_MAXINTERFACES) {
dev_warn(ddev, "config %d has too many interfaces: %d, "
"using maximum allowed: %d\n",
@@ -900,14 +902,25 @@
}
}
+static const __u8 bos_desc_len[256] = {
+ [USB_CAP_TYPE_WIRELESS_USB] = USB_DT_USB_WIRELESS_CAP_SIZE,
+ [USB_CAP_TYPE_EXT] = USB_DT_USB_EXT_CAP_SIZE,
+ [USB_SS_CAP_TYPE] = USB_DT_USB_SS_CAP_SIZE,
+ [USB_SSP_CAP_TYPE] = USB_DT_USB_SSP_CAP_SIZE(1),
+ [CONTAINER_ID_TYPE] = USB_DT_USB_SS_CONTN_ID_SIZE,
+ [USB_PTM_CAP_TYPE] = USB_DT_USB_PTM_ID_SIZE,
+};
+
/* Get BOS descriptor set */
int usb_get_bos_descriptor(struct usb_device *dev)
{
struct device *ddev = &dev->dev;
struct usb_bos_descriptor *bos;
struct usb_dev_cap_header *cap;
+ struct usb_ssp_cap_descriptor *ssp_cap;
unsigned char *buffer;
- int length, total_len, num, i;
+ int length, total_len, num, i, ssac;
+ __u8 cap_type;
int ret;
bos = kzalloc(sizeof(struct usb_bos_descriptor), GFP_KERNEL);
@@ -960,7 +973,13 @@
dev->bos->desc->bNumDeviceCaps = i;
break;
}
+ cap_type = cap->bDevCapabilityType;
length = cap->bLength;
+ if (bos_desc_len[cap_type] && length < bos_desc_len[cap_type]) {
+ dev->bos->desc->bNumDeviceCaps = i;
+ break;
+ }
+
total_len -= length;
if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) {
@@ -968,7 +987,7 @@
continue;
}
- switch (cap->bDevCapabilityType) {
+ switch (cap_type) {
case USB_CAP_TYPE_WIRELESS_USB:
/* Wireless USB cap descriptor is handled by wusb */
break;
@@ -981,8 +1000,11 @@
(struct usb_ss_cap_descriptor *)buffer;
break;
case USB_SSP_CAP_TYPE:
- dev->bos->ssp_cap =
- (struct usb_ssp_cap_descriptor *)buffer;
+ ssp_cap = (struct usb_ssp_cap_descriptor *)buffer;
+ ssac = (le32_to_cpu(ssp_cap->bmAttributes) &
+ USB_SSP_SUBLINK_SPEED_ATTRIBS);
+ if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac))
+ dev->bos->ssp_cap = ssp_cap;
break;
case CONTAINER_ID_TYPE:
dev->bos->ss_id =
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index fa61935..893ebae 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -134,42 +134,38 @@
#define USB_DEVICE_DEV MKDEV(USB_DEVICE_MAJOR, 0)
/* Limit on the total amount of memory we can allocate for transfers */
-static unsigned usbfs_memory_mb = 16;
+static u32 usbfs_memory_mb = 16;
module_param(usbfs_memory_mb, uint, 0644);
MODULE_PARM_DESC(usbfs_memory_mb,
"maximum MB allowed for usbfs buffers (0 = no limit)");
/* Hard limit, necessary to avoid arithmetic overflow */
-#define USBFS_XFER_MAX (UINT_MAX / 2 - 1000000)
+#define USBFS_XFER_MAX (UINT_MAX / 2 - 1000000)
-static atomic_t usbfs_memory_usage; /* Total memory currently allocated */
+static atomic64_t usbfs_memory_usage; /* Total memory currently allocated */
/* Check whether it's okay to allocate more memory for a transfer */
-static int usbfs_increase_memory_usage(unsigned amount)
+static int usbfs_increase_memory_usage(u64 amount)
{
- unsigned lim;
+ u64 lim;
- /*
- * Convert usbfs_memory_mb to bytes, avoiding overflows.
- * 0 means use the hard limit (effectively unlimited).
- */
lim = ACCESS_ONCE(usbfs_memory_mb);
- if (lim == 0 || lim > (USBFS_XFER_MAX >> 20))
- lim = USBFS_XFER_MAX;
- else
- lim <<= 20;
+ lim <<= 20;
- atomic_add(amount, &usbfs_memory_usage);
- if (atomic_read(&usbfs_memory_usage) <= lim)
- return 0;
- atomic_sub(amount, &usbfs_memory_usage);
- return -ENOMEM;
+ atomic64_add(amount, &usbfs_memory_usage);
+
+ if (lim > 0 && atomic64_read(&usbfs_memory_usage) > lim) {
+ atomic64_sub(amount, &usbfs_memory_usage);
+ return -ENOMEM;
+ }
+
+ return 0;
}
/* Memory for a transfer is being deallocated */
-static void usbfs_decrease_memory_usage(unsigned amount)
+static void usbfs_decrease_memory_usage(u64 amount)
{
- atomic_sub(amount, &usbfs_memory_usage);
+ atomic64_sub(amount, &usbfs_memory_usage);
}
static int connected(struct usb_dev_state *ps)
@@ -1191,7 +1187,7 @@
if (!usb_maxpacket(dev, pipe, !(bulk.ep & USB_DIR_IN)))
return -EINVAL;
len1 = bulk.len;
- if (len1 >= USBFS_XFER_MAX)
+ if (len1 >= (INT_MAX - sizeof(struct urb)))
return -EINVAL;
ret = usbfs_increase_memory_usage(len1 + sizeof(struct urb));
if (ret)
@@ -1458,13 +1454,19 @@
int number_of_packets = 0;
unsigned int stream_id = 0;
void *buf;
-
- if (uurb->flags & ~(USBDEVFS_URB_ISO_ASAP |
- USBDEVFS_URB_SHORT_NOT_OK |
+ unsigned long mask = USBDEVFS_URB_SHORT_NOT_OK |
USBDEVFS_URB_BULK_CONTINUATION |
USBDEVFS_URB_NO_FSBR |
USBDEVFS_URB_ZERO_PACKET |
- USBDEVFS_URB_NO_INTERRUPT))
+ USBDEVFS_URB_NO_INTERRUPT;
+ /* USBDEVFS_URB_ISO_ASAP is a special case */
+ if (uurb->type == USBDEVFS_URB_TYPE_ISO)
+ mask |= USBDEVFS_URB_ISO_ASAP;
+
+ if (uurb->flags & ~mask)
+ return -EINVAL;
+
+ if ((unsigned int)uurb->buffer_length >= USBFS_XFER_MAX)
return -EINVAL;
if (uurb->buffer_length > 0 && !uurb->buffer)
return -EINVAL;
@@ -1584,10 +1586,6 @@
return -EINVAL;
}
- if (uurb->buffer_length >= USBFS_XFER_MAX) {
- ret = -EINVAL;
- goto error;
- }
if (uurb->buffer_length > 0 &&
!access_ok(is_in ? VERIFY_WRITE : VERIFY_READ,
uurb->buffer, uurb->buffer_length)) {
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 706b3d6..d0d3f9e 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4925,6 +4925,15 @@
usb_put_dev(udev);
if ((status == -ENOTCONN) || (status == -ENOTSUPP))
break;
+
+ /* When halfway through our retry count, power-cycle the port */
+ if (i == (SET_CONFIG_TRIES / 2) - 1) {
+ dev_info(&port_dev->dev, "attempt power cycle\n");
+ usb_hub_set_port_power(hdev, hub, port1, false);
+ msleep(2 * hub_power_on_good_delay(hub));
+ usb_hub_set_port_power(hdev, hub, port1, true);
+ msleep(hub_power_on_good_delay(hub));
+ }
}
if (hub->hdev->parent ||
!hcd->driver->port_handed_over ||
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 37c418e..c05c4f8 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -57,10 +57,11 @@
/* Microsoft LifeCam-VX700 v2.0 */
{ USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
- /* Logitech HD Pro Webcams C920, C920-C and C930e */
+ /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
{ USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
{ USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT },
+ { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT },
/* Logitech ConferenceCam CC3000e */
{ USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT },
@@ -151,6 +152,12 @@
/* appletouch */
{ USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME },
+ /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */
+ { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM },
+
+ /* ELSA MicroLink 56K */
+ { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME },
+
/* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */
{ USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM },
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index c61ddbf..16c6712 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -3092,15 +3092,10 @@
int dwc3_gadget_suspend(struct dwc3 *dwc)
{
- int ret;
-
if (!dwc->gadget_driver)
return 0;
- ret = dwc3_gadget_run_stop(dwc, false, false);
- if (ret < 0)
- return ret;
-
+ dwc3_gadget_run_stop(dwc, false, false);
dwc3_disconnect_gadget(dwc);
__dwc3_gadget_stop(dwc);
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index afc581a..b47ef9e 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -150,7 +150,6 @@
struct usb_function *f,
struct usb_ep *_ep)
{
- struct usb_composite_dev *cdev = get_gadget_data(g);
struct usb_endpoint_descriptor *chosen_desc = NULL;
struct usb_descriptor_header **speed_desc = NULL;
@@ -229,8 +228,12 @@
_ep->maxburst = comp_desc->bMaxBurst + 1;
break;
default:
- if (comp_desc->bMaxBurst != 0)
+ if (comp_desc->bMaxBurst != 0) {
+ struct usb_composite_dev *cdev;
+
+ cdev = get_gadget_data(g);
ERROR(cdev, "ep0 bMaxBurst must be 0\n");
+ }
_ep->maxburst = 1;
break;
}
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 2ef4377..b1d22d8 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -300,6 +300,7 @@
ret = unregister_gadget(gi);
if (ret)
goto err;
+ kfree(name);
} else {
if (gi->composite.gadget_driver.udc_name) {
ret = -EBUSY;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 273320f..d90bf57 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1015,7 +1015,7 @@
else
ret = ep->status;
goto error_mutex;
- } else if (!(req = usb_ep_alloc_request(ep->ep, GFP_KERNEL))) {
+ } else if (!(req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC))) {
ret = -ENOMEM;
} else {
req->buf = data;
@@ -2262,9 +2262,18 @@
int i;
if (len < sizeof(*d) ||
- d->bFirstInterfaceNumber >= ffs->interfaces_count ||
- !d->Reserved1)
+ d->bFirstInterfaceNumber >= ffs->interfaces_count)
return -EINVAL;
+ if (d->Reserved1 != 1) {
+ /*
+ * According to the spec, Reserved1 must be set to 1
+ * but older kernels incorrectly rejected non-zero
+ * values. We fix it here to avoid returning EINVAL
+ * in response to values we used to accept.
+ */
+ pr_debug("usb_ext_compat_desc::Reserved1 forced to 1\n");
+ d->Reserved1 = 1;
+ }
for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i)
if (d->Reserved2[i])
return -EINVAL;
@@ -3716,7 +3725,8 @@
ci = opts->func_inst.group.cg_item.ci_parent->ci_parent;
ffs_dev_unlock();
- unregister_gadget_item(ci);
+ if (test_bit(FFS_FL_BOUND, &ffs->flags))
+ unregister_gadget_item(ci);
return;
done:
ffs_dev_unlock();
diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index c7689d0..f8a1881 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -594,6 +594,14 @@
opts->streaming_maxpacket = clamp(opts->streaming_maxpacket, 1U, 3072U);
opts->streaming_maxburst = min(opts->streaming_maxburst, 15U);
+ /* For SS, wMaxPacketSize has to be 1024 if bMaxBurst is not 0 */
+ if (opts->streaming_maxburst &&
+ (opts->streaming_maxpacket % 1024) != 0) {
+ opts->streaming_maxpacket = roundup(opts->streaming_maxpacket, 1024);
+ INFO(cdev, "overriding streaming_maxpacket to %d\n",
+ opts->streaming_maxpacket);
+ }
+
/* Fill in the FS/HS/SS Video Streaming specific descriptors from the
* module parameters.
*
diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c
index 31125a4..d7dcd39 100644
--- a/drivers/usb/gadget/function/uvc_configfs.c
+++ b/drivers/usb/gadget/function/uvc_configfs.c
@@ -2140,7 +2140,7 @@
.release = uvc_attr_release,
};
-#define UVCG_OPTS_ATTR(cname, conv, str2u, uxx, vnoc, limit) \
+#define UVCG_OPTS_ATTR(cname, aname, conv, str2u, uxx, vnoc, limit) \
static ssize_t f_uvc_opts_##cname##_show( \
struct config_item *item, char *page) \
{ \
@@ -2183,16 +2183,16 @@
return ret; \
} \
\
-UVC_ATTR(f_uvc_opts_, cname, aname)
+UVC_ATTR(f_uvc_opts_, cname, cname)
#define identity_conv(x) (x)
-UVCG_OPTS_ATTR(streaming_interval, identity_conv, kstrtou8, u8, identity_conv,
- 16);
-UVCG_OPTS_ATTR(streaming_maxpacket, le16_to_cpu, kstrtou16, u16, le16_to_cpu,
- 3072);
-UVCG_OPTS_ATTR(streaming_maxburst, identity_conv, kstrtou8, u8, identity_conv,
- 15);
+UVCG_OPTS_ATTR(streaming_interval, streaming_interval, identity_conv,
+ kstrtou8, u8, identity_conv, 16);
+UVCG_OPTS_ATTR(streaming_maxpacket, streaming_maxpacket, le16_to_cpu,
+ kstrtou16, u16, le16_to_cpu, 3072);
+UVCG_OPTS_ATTR(streaming_maxburst, streaming_maxburst, identity_conv,
+ kstrtou8, u8, identity_conv, 15);
#undef identity_conv
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index f69dbd4..b8534d3 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1819,8 +1819,10 @@
spin_lock_irq (&dev->lock);
value = -EINVAL;
- if (dev->buf)
+ if (dev->buf) {
+ kfree(kbuf);
goto fail;
+ }
dev->buf = kbuf;
/* full or low speed config */
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index d685d82..e97539f 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -913,7 +913,7 @@
return 0;
/* "high bandwidth" works only at high speed */
- if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp(desc) & (3<<11))
+ if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp_mult(desc) > 1)
return 0;
switch (type) {
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index 33f3987..d133252 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -1146,15 +1146,15 @@
*/
while (!list_empty(&ep->queue)) {
struct net2280_request *req;
- u32 tmp;
+ u32 req_dma_count;
req = list_entry(ep->queue.next,
struct net2280_request, queue);
if (!req->valid)
break;
rmb();
- tmp = le32_to_cpup(&req->td->dmacount);
- if ((tmp & BIT(VALID_BIT)) != 0)
+ req_dma_count = le32_to_cpup(&req->td->dmacount);
+ if ((req_dma_count & BIT(VALID_BIT)) != 0)
break;
/* SHORT_PACKET_TRANSFERRED_INTERRUPT handles "usb-short"
@@ -1163,40 +1163,41 @@
*/
if (unlikely(req->td->dmadesc == 0)) {
/* paranoia */
- tmp = readl(&ep->dma->dmacount);
- if (tmp & DMA_BYTE_COUNT_MASK)
+ u32 const ep_dmacount = readl(&ep->dma->dmacount);
+
+ if (ep_dmacount & DMA_BYTE_COUNT_MASK)
break;
/* single transfer mode */
- dma_done(ep, req, tmp, 0);
+ dma_done(ep, req, req_dma_count, 0);
num_completed++;
break;
} else if (!ep->is_in &&
(req->req.length % ep->ep.maxpacket) &&
!(ep->dev->quirks & PLX_PCIE)) {
- tmp = readl(&ep->regs->ep_stat);
+ u32 const ep_stat = readl(&ep->regs->ep_stat);
/* AVOID TROUBLE HERE by not issuing short reads from
* your gadget driver. That helps avoids errata 0121,
* 0122, and 0124; not all cases trigger the warning.
*/
- if ((tmp & BIT(NAK_OUT_PACKETS)) == 0) {
+ if ((ep_stat & BIT(NAK_OUT_PACKETS)) == 0) {
ep_warn(ep->dev, "%s lost packet sync!\n",
ep->ep.name);
req->req.status = -EOVERFLOW;
} else {
- tmp = readl(&ep->regs->ep_avail);
- if (tmp) {
+ u32 const ep_avail = readl(&ep->regs->ep_avail);
+ if (ep_avail) {
/* fifo gets flushed later */
ep->out_overflow = 1;
ep_dbg(ep->dev,
"%s dma, discard %d len %d\n",
- ep->ep.name, tmp,
+ ep->ep.name, ep_avail,
req->req.length);
req->req.status = -EOVERFLOW;
}
}
}
- dma_done(ep, req, tmp, 0);
+ dma_done(ep, req, req_dma_count, 0);
num_completed++;
}
diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c
index a97da64..8a365aa 100644
--- a/drivers/usb/gadget/udc/pch_udc.c
+++ b/drivers/usb/gadget/udc/pch_udc.c
@@ -1523,7 +1523,6 @@
td = phys_to_virt(addr);
addr2 = (dma_addr_t)td->next;
pci_pool_free(dev->data_requests, td, addr);
- td->next = 0x00;
addr = addr2;
}
req->chain_len = 1;
diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c
index 7fa60f5..afd6b86 100644
--- a/drivers/usb/gadget/udc/pxa27x_udc.c
+++ b/drivers/usb/gadget/udc/pxa27x_udc.c
@@ -2534,9 +2534,10 @@
usb_del_gadget_udc(&udc->gadget);
pxa_cleanup_debugfs(udc);
- if (!IS_ERR_OR_NULL(udc->transceiver))
+ if (!IS_ERR_OR_NULL(udc->transceiver)) {
usb_unregister_notifier(udc->transceiver, &pxa27x_udc_phy);
- usb_put_phy(udc->transceiver);
+ usb_put_phy(udc->transceiver);
+ }
udc->transceiver = NULL;
the_controller = NULL;
diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index bb89e24..2197a50 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -222,7 +222,7 @@
#define USB3_EP0_SS_MAX_PACKET_SIZE 512
#define USB3_EP0_HSFS_MAX_PACKET_SIZE 64
#define USB3_EP0_BUF_SIZE 8
-#define USB3_MAX_NUM_PIPES 30
+#define USB3_MAX_NUM_PIPES 6 /* This includes PIPE 0 */
#define USB3_WAIT_US 3
struct renesas_usb3;
diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c
index 1a2614a..3ff6468 100644
--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -837,7 +837,7 @@
default: /* unknown */
break;
}
- temp = (cap >> 8) & 0xff;
+ offset = (cap >> 8) & 0xff;
}
}
#endif
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index b7114c3..3b7d69c 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -996,6 +996,12 @@
if (!vdev)
return;
+ if (vdev->real_port == 0 ||
+ vdev->real_port > HCS_MAX_PORTS(xhci->hcs_params1)) {
+ xhci_dbg(xhci, "Bad vdev->real_port.\n");
+ goto out;
+ }
+
tt_list_head = &(xhci->rh_bw[vdev->real_port - 1].tts);
list_for_each_entry_safe(tt_info, next, tt_list_head, tt_list) {
/* is this a hub device that added a tt_info to the tts list */
@@ -1009,6 +1015,7 @@
}
}
}
+out:
/* we are now at a leaf device */
xhci_free_virt_device(xhci, slot_id);
}
@@ -1025,10 +1032,9 @@
return 0;
}
- xhci->devs[slot_id] = kzalloc(sizeof(*xhci->devs[slot_id]), flags);
- if (!xhci->devs[slot_id])
+ dev = kzalloc(sizeof(*dev), flags);
+ if (!dev)
return 0;
- dev = xhci->devs[slot_id];
/* Allocate the (output) device context that will be used in the HC. */
dev->out_ctx = xhci_alloc_container_ctx(xhci, XHCI_CTX_TYPE_DEVICE, flags);
@@ -1076,9 +1082,18 @@
&xhci->dcbaa->dev_context_ptrs[slot_id],
le64_to_cpu(xhci->dcbaa->dev_context_ptrs[slot_id]));
+ xhci->devs[slot_id] = dev;
+
return 1;
fail:
- xhci_free_virt_device(xhci, slot_id);
+ if (dev->eps[0].ring)
+ xhci_ring_free(xhci, dev->eps[0].ring);
+ if (dev->in_ctx)
+ xhci_free_container_ctx(xhci, dev->in_ctx);
+ if (dev->out_ctx)
+ xhci_free_container_ctx(xhci, dev->out_ctx);
+ kfree(dev);
+
return 0;
}
diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index f2365a4..ce9e457 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -632,13 +632,13 @@
goto power_off_phys;
}
- if (HCC_MAX_PSA(xhci->hcc_params) >= 4)
- xhci->shared_hcd->can_do_streams = 1;
-
ret = usb_add_hcd(hcd, irq, IRQF_SHARED);
if (ret)
goto put_usb3_hcd;
+ if (HCC_MAX_PSA(xhci->hcc_params) >= 4)
+ xhci->shared_hcd->can_do_streams = 1;
+
ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED);
if (ret)
goto dealloc_usb2_hcd;
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index c87ef38..f6782a3 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -190,6 +190,9 @@
xhci->quirks |= XHCI_BROKEN_STREAMS;
}
if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
+ pdev->device == 0x0014)
+ xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+ if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
pdev->device == 0x0015)
xhci->quirks |= XHCI_RESET_ON_RESUME;
if (pdev->vendor == PCI_VENDOR_ID_VIA)
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index ca8b0b1..dec1008 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -335,6 +335,7 @@
static struct platform_driver usb_xhci_driver = {
.probe = xhci_plat_probe,
.remove = xhci_plat_remove,
+ .shutdown = usb_hcd_platform_shutdown,
.driver = {
.name = "xhci-hcd",
.pm = DEV_PM_OPS,
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 63735b5..89a14d5 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3132,7 +3132,7 @@
{
u32 maxp, total_packet_count;
- /* MTK xHCI is mostly 0.97 but contains some features from 1.0 */
+ /* MTK xHCI 0.96 contains some features from 1.0 */
if (xhci->hci_version < 0x100 && !(xhci->quirks & XHCI_MTK_HOST))
return ((td_total_len - transferred) >> 10);
@@ -3141,8 +3141,8 @@
trb_buff_len == td_total_len)
return 0;
- /* for MTK xHCI, TD size doesn't include this TRB */
- if (xhci->quirks & XHCI_MTK_HOST)
+ /* for MTK xHCI 0.96, TD size include this TRB, but not in 1.x */
+ if ((xhci->quirks & XHCI_MTK_HOST) && (xhci->hci_version < 0x100))
trb_buff_len = 0;
maxp = GET_MAX_PACKET(usb_endpoint_maxp(&urb->ep->desc));
diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c
index 8e7737d..03be5d5 100644
--- a/drivers/usb/misc/usb3503.c
+++ b/drivers/usb/misc/usb3503.c
@@ -292,6 +292,8 @@
if (gpio_is_valid(hub->gpio_reset)) {
err = devm_gpio_request_one(dev, hub->gpio_reset,
GPIOF_OUT_INIT_LOW, "usb3503 reset");
+ /* Datasheet defines a hardware reset to be at least 100us */
+ usleep_range(100, 10000);
if (err) {
dev_err(dev,
"unable to request GPIO %d as reset pin (%d)\n",
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 1a874a1..80b37d2 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1002,7 +1002,9 @@
break;
case MON_IOCQ_RING_SIZE:
+ mutex_lock(&rp->fetch_lock);
ret = rp->b_size;
+ mutex_unlock(&rp->fetch_lock);
break;
case MON_IOCT_RING_SIZE:
@@ -1229,12 +1231,16 @@
unsigned long offset, chunk_idx;
struct page *pageptr;
+ mutex_lock(&rp->fetch_lock);
offset = vmf->pgoff << PAGE_SHIFT;
- if (offset >= rp->b_size)
+ if (offset >= rp->b_size) {
+ mutex_unlock(&rp->fetch_lock);
return VM_FAULT_SIGBUS;
+ }
chunk_idx = offset / CHUNK_SIZE;
pageptr = rp->b_vec[chunk_idx].pg;
get_page(pageptr);
+ mutex_unlock(&rp->fetch_lock);
vmf->page = pageptr;
return 0;
}
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index bacee0f..ea5bad4 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -302,7 +302,15 @@
musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
portstate(musb->port1_status |= USB_PORT_STAT_POWER);
del_timer(&otg_workaround);
- } else {
+ } else if (!(musb->int_usb & MUSB_INTR_BABBLE)){
+ /*
+ * When babble condition happens, drvvbus interrupt
+ * is also generated. Ignore this drvvbus interrupt
+ * and let babble interrupt handler recovers the
+ * controller; otherwise, the host-mode flag is lost
+ * due to the MUSB_DEV_MODE() call below and babble
+ * recovery logic will not called.
+ */
musb->is_active = 0;
MUSB_DEV_MODE(musb);
otg->default_a = 0;
diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c
index db68156..b3b33cf 100644
--- a/drivers/usb/phy/phy-isp1301.c
+++ b/drivers/usb/phy/phy-isp1301.c
@@ -33,6 +33,12 @@
};
MODULE_DEVICE_TABLE(i2c, isp1301_id);
+static const struct of_device_id isp1301_of_match[] = {
+ {.compatible = "nxp,isp1301" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, isp1301_of_match);
+
static struct i2c_client *isp1301_i2c_client;
static int __isp1301_write(struct isp1301 *isp, u8 reg, u8 value, u8 clear)
@@ -130,6 +136,7 @@
static struct i2c_driver isp1301_driver = {
.driver = {
.name = DRV_NAME,
+ .of_match_table = of_match_ptr(isp1301_of_match),
},
.probe = isp1301_probe,
.remove = isp1301_remove,
diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c
index ab5d364..335a1ef 100644
--- a/drivers/usb/phy/phy-tahvo.c
+++ b/drivers/usb/phy/phy-tahvo.c
@@ -368,7 +368,8 @@
tu->extcon = devm_extcon_dev_allocate(&pdev->dev, tahvo_cable);
if (IS_ERR(tu->extcon)) {
dev_err(&pdev->dev, "failed to allocate memory for extcon\n");
- return -ENOMEM;
+ ret = PTR_ERR(tu->extcon);
+ goto err_disable_clk;
}
ret = devm_extcon_dev_register(&pdev->dev, tu->extcon);
diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig
index 56ecb8b..584ae8c 100644
--- a/drivers/usb/serial/Kconfig
+++ b/drivers/usb/serial/Kconfig
@@ -63,6 +63,7 @@
- Google USB serial devices
- HP4x calculators
- a number of Motorola phones
+ - Motorola Tetra devices
- Novatel Wireless GPS receivers
- Siemens USB/MPI adapter.
- ViVOtech ViVOpay USB device.
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 11ee55e..3178d8a 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -121,6 +121,7 @@
{ USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
{ USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
{ USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
+ { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
{ USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
{ USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
{ USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
@@ -171,6 +172,7 @@
{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
{ USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
+ { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
{ USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
{ USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
{ USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 3249f42..0c743e4 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1017,6 +1017,7 @@
.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
{ USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) },
{ USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) },
+ { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) },
{ } /* Terminating entry */
};
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index f9d15bd..543d280 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -914,6 +914,12 @@
#define ICPDAS_I7563U_PID 0x0105
/*
+ * Airbus Defence and Space
+ */
+#define AIRBUS_DS_VID 0x1e8e /* Vendor ID */
+#define AIRBUS_DS_P8GR 0x6001 /* Tetra P8GR */
+
+/*
* RT Systems programming cables for various ham radios
*/
#define RTSYSTEMS_VID 0x2100 /* Vendor ID */
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index 464db17..de61271 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -2215,7 +2215,6 @@
/* something went wrong */
dev_err(dev, "%s - usb_submit_urb(write command) failed, status = %d\n",
__func__, status);
- usb_kill_urb(urb);
usb_free_urb(urb);
atomic_dec(&CmdUrbs);
return status;
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index db3d34c..1799aa0 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -236,11 +236,14 @@
/* These Quectel products use Qualcomm's vendor ID */
#define QUECTEL_PRODUCT_UC20 0x9003
#define QUECTEL_PRODUCT_UC15 0x9090
+/* These Yuga products use Qualcomm's vendor ID */
+#define YUGA_PRODUCT_CLM920_NC5 0x9625
#define QUECTEL_VENDOR_ID 0x2c7c
/* These Quectel products use Quectel's vendor ID */
#define QUECTEL_PRODUCT_EC21 0x0121
#define QUECTEL_PRODUCT_EC25 0x0125
+#define QUECTEL_PRODUCT_BG96 0x0296
#define CMOTECH_VENDOR_ID 0x16d8
#define CMOTECH_PRODUCT_6001 0x6001
@@ -282,6 +285,7 @@
#define TELIT_PRODUCT_LE922_USBCFG3 0x1043
#define TELIT_PRODUCT_LE922_USBCFG5 0x1045
#define TELIT_PRODUCT_ME910 0x1100
+#define TELIT_PRODUCT_ME910_DUAL_MODEM 0x1101
#define TELIT_PRODUCT_LE920 0x1200
#define TELIT_PRODUCT_LE910 0x1201
#define TELIT_PRODUCT_LE910_USBCFG4 0x1206
@@ -379,6 +383,9 @@
#define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603
#define FOUR_G_SYSTEMS_PRODUCT_W100 0x9b01
+/* Fujisoft products */
+#define FUJISOFT_PRODUCT_FS040U 0x9b02
+
/* iBall 3.5G connect wireless modem */
#define IBALL_3_5G_CONNECT 0x9605
@@ -647,6 +654,11 @@
.reserved = BIT(1) | BIT(3),
};
+static const struct option_blacklist_info telit_me910_dual_modem_blacklist = {
+ .sendsetup = BIT(0),
+ .reserved = BIT(3),
+};
+
static const struct option_blacklist_info telit_le910_blacklist = {
.sendsetup = BIT(0),
.reserved = BIT(1) | BIT(2),
@@ -676,6 +688,10 @@
.reserved = BIT(4) | BIT(5),
};
+static const struct option_blacklist_info yuga_clm920_nc5_blacklist = {
+ .reserved = BIT(1) | BIT(4),
+};
+
static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1180,11 +1196,16 @@
{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ /* Yuga products use Qualcomm vendor ID */
+ { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5),
+ .driver_info = (kernel_ulong_t)&yuga_clm920_nc5_blacklist },
/* Quectel products using Quectel vendor ID */
{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25),
.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+ { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
+ .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),
@@ -1244,6 +1265,8 @@
.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
.driver_info = (kernel_ulong_t)&telit_me910_blacklist },
+ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
+ .driver_info = (kernel_ulong_t)&telit_me910_dual_modem_blacklist },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
.driver_info = (kernel_ulong_t)&telit_le910_blacklist },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
@@ -1877,6 +1900,8 @@
{ USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100),
.driver_info = (kernel_ulong_t)&four_g_w100_blacklist
},
+ {USB_DEVICE(LONGCHEER_VENDOR_ID, FUJISOFT_PRODUCT_FS040U),
+ .driver_info = (kernel_ulong_t)&net_intf3_blacklist},
{ USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) },
{ USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9801, 0xff),
.driver_info = (kernel_ulong_t)&net_intf3_blacklist },
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index a51b283..3da25ad 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -39,6 +39,7 @@
{ USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ2) },
{ USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_DCU11) },
{ USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ3) },
+ { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_CHILITAG) },
{ USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_PHAROS) },
{ USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ALDIGA) },
{ USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MMX) },
diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
index 3b5a15d..1232890 100644
--- a/drivers/usb/serial/pl2303.h
+++ b/drivers/usb/serial/pl2303.h
@@ -17,6 +17,7 @@
#define PL2303_PRODUCT_ID_DCU11 0x1234
#define PL2303_PRODUCT_ID_PHAROS 0xaaa0
#define PL2303_PRODUCT_ID_RSAQ3 0xaaa2
+#define PL2303_PRODUCT_ID_CHILITAG 0xaaa8
#define PL2303_PRODUCT_ID_ALDIGA 0x0611
#define PL2303_PRODUCT_ID_MMX 0x0612
#define PL2303_PRODUCT_ID_GPRS 0x0609
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 4516291..fb6dc16 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -166,6 +166,8 @@
{DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */
{DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */
{DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */
+ {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */
+ {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */
{DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
{DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
{DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
@@ -346,6 +348,7 @@
break;
case 2:
dev_dbg(dev, "NMEA GPS interface found\n");
+ sendsetup = true;
break;
case 3:
dev_dbg(dev, "Modem port found\n");
diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c
index e98b6e5..6aa7ff2 100644
--- a/drivers/usb/serial/usb-serial-simple.c
+++ b/drivers/usb/serial/usb-serial-simple.c
@@ -80,6 +80,11 @@
{ USB_DEVICE(0x22b8, 0x2c64) } /* Motorola V950 phone */
DEVICE(moto_modem, MOTO_IDS);
+/* Motorola Tetra driver */
+#define MOTOROLA_TETRA_IDS() \
+ { USB_DEVICE(0x0cad, 0x9011) } /* Motorola Solutions TETRA PEI */
+DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS);
+
/* Novatel Wireless GPS driver */
#define NOVATEL_IDS() \
{ USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */
@@ -110,6 +115,7 @@
&google_device,
&vivopay_device,
&moto_modem_device,
+ &motorola_tetra_device,
&novatel_gps_device,
&hp4x_device,
&suunto_device,
@@ -125,6 +131,7 @@
GOOGLE_IDS(),
VIVOPAY_IDS(),
MOTO_IDS(),
+ MOTOROLA_TETRA_IDS(),
NOVATEL_IDS(),
HP4X_IDS(),
SUUNTO_IDS(),
diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h
index a155cd0..ecc83c4 100644
--- a/drivers/usb/storage/uas-detect.h
+++ b/drivers/usb/storage/uas-detect.h
@@ -111,6 +111,10 @@
}
}
+ /* All Seagate disk enclosures have broken ATA pass-through support */
+ if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bc2)
+ flags |= US_FL_NO_ATA_1X;
+
usb_stor_adjust_quirks(udev, &flags);
if (flags & US_FL_IGNORE_UAS) {
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 9876af4..6891e90 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -1076,20 +1076,19 @@
return 0;
err = uas_configure_endpoints(devinfo);
- if (err) {
+ if (err && err != ENODEV)
shost_printk(KERN_ERR, shost,
"%s: alloc streams error %d after reset",
__func__, err);
- return 1;
- }
+ /* we must unblock the host in every case lest we deadlock */
spin_lock_irqsave(shost->host_lock, flags);
scsi_report_bus_reset(shost, 0);
spin_unlock_irqrestore(shost->host_lock, flags);
scsi_unblock_requests(shost);
- return 0;
+ return err ? 1 : 0;
}
static int uas_suspend(struct usb_interface *intf, pm_message_t message)
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 2572fd5..b605115 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2113,6 +2113,13 @@
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
US_FL_BROKEN_FUA ),
+/* Reported by David Kozub <zub@linux.fjfi.cvut.cz> */
+UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
+ "JMicron",
+ "JMS567",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_BROKEN_FUA),
+
/*
* Reported by Alexandre Oliva <oliva@lsd.ic.unicamp.br>
* JMicron responds to USN and several other SCSI ioctls with a
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index cde1153..719ec68 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -142,6 +142,13 @@
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES),
+/* Reported-by: David Kozub <zub@linux.fjfi.cvut.cz> */
+UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
+ "JMicron",
+ "JMS567",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_BROKEN_FUA),
+
/* Reported-by: Hans de Goede <hdegoede@redhat.com> */
UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
"VIA",
@@ -149,6 +156,13 @@
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
US_FL_NO_ATA_1X),
+/* Reported-by: Icenowy Zheng <icenowy@aosc.io> */
+UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999,
+ "Norelsys",
+ "NS1068X",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_IGNORE_UAS),
+
/* Reported-by: Takeo Nakayama <javhera@gmx.com> */
UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999,
"JMicron",
diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
index c653ce5..1886d8e 100644
--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -163,8 +163,7 @@
* step 1?
*/
if (ud->tcp_socket) {
- dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n",
- ud->tcp_socket);
+ dev_dbg(&sdev->udev->dev, "shutdown sockfd\n");
kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
}
diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c
index af10f7b..325b4c0 100644
--- a/drivers/usb/usbip/stub_main.c
+++ b/drivers/usb/usbip/stub_main.c
@@ -252,11 +252,12 @@
struct stub_priv *priv;
struct urb *urb;
- dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev);
+ dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n");
while ((priv = stub_priv_pop(sdev))) {
urb = priv->urb;
- dev_dbg(&sdev->udev->dev, "free urb %p\n", urb);
+ dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n",
+ priv->seqnum);
usb_kill_urb(urb);
kmem_cache_free(stub_priv_cache, priv);
diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c
index 191b176..5b80718 100644
--- a/drivers/usb/usbip/stub_rx.c
+++ b/drivers/usb/usbip/stub_rx.c
@@ -225,9 +225,6 @@
if (priv->seqnum != pdu->u.cmd_unlink.seqnum)
continue;
- dev_info(&priv->urb->dev->dev, "unlink urb %p\n",
- priv->urb);
-
/*
* This matched urb is not completed yet (i.e., be in
* flight in usb hcd hardware/driver). Now we are
@@ -266,8 +263,8 @@
ret = usb_unlink_urb(priv->urb);
if (ret != -EINPROGRESS)
dev_err(&priv->urb->dev->dev,
- "failed to unlink a urb %p, ret %d\n",
- priv->urb, ret);
+ "failed to unlink a urb # %lu, ret %d\n",
+ priv->seqnum, ret);
return 0;
}
@@ -336,23 +333,34 @@
return priv;
}
-static int get_pipe(struct stub_device *sdev, int epnum, int dir)
+static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu)
{
struct usb_device *udev = sdev->udev;
struct usb_host_endpoint *ep;
struct usb_endpoint_descriptor *epd = NULL;
+ int epnum = pdu->base.ep;
+ int dir = pdu->base.direction;
+
+ if (epnum < 0 || epnum > 15)
+ goto err_ret;
if (dir == USBIP_DIR_IN)
ep = udev->ep_in[epnum & 0x7f];
else
ep = udev->ep_out[epnum & 0x7f];
- if (!ep) {
- dev_err(&sdev->udev->dev, "no such endpoint?, %d\n",
- epnum);
- BUG();
- }
+ if (!ep)
+ goto err_ret;
epd = &ep->desc;
+
+ /* validate transfer_buffer_length */
+ if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) {
+ dev_err(&sdev->udev->dev,
+ "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n",
+ pdu->u.cmd_submit.transfer_buffer_length);
+ return -1;
+ }
+
if (usb_endpoint_xfer_control(epd)) {
if (dir == USBIP_DIR_OUT)
return usb_sndctrlpipe(udev, epnum);
@@ -375,15 +383,31 @@
}
if (usb_endpoint_xfer_isoc(epd)) {
+ /* validate packet size and number of packets */
+ unsigned int maxp, packets, bytes;
+
+ maxp = usb_endpoint_maxp(epd);
+ maxp *= usb_endpoint_maxp_mult(epd);
+ bytes = pdu->u.cmd_submit.transfer_buffer_length;
+ packets = DIV_ROUND_UP(bytes, maxp);
+
+ if (pdu->u.cmd_submit.number_of_packets < 0 ||
+ pdu->u.cmd_submit.number_of_packets > packets) {
+ dev_err(&sdev->udev->dev,
+ "CMD_SUBMIT: isoc invalid num packets %d\n",
+ pdu->u.cmd_submit.number_of_packets);
+ return -1;
+ }
if (dir == USBIP_DIR_OUT)
return usb_sndisocpipe(udev, epnum);
else
return usb_rcvisocpipe(udev, epnum);
}
+err_ret:
/* NOT REACHED */
- dev_err(&sdev->udev->dev, "get pipe, epnum %d\n", epnum);
- return 0;
+ dev_err(&sdev->udev->dev, "CMD_SUBMIT: invalid epnum %d\n", epnum);
+ return -1;
}
static void masking_bogus_flags(struct urb *urb)
@@ -447,7 +471,10 @@
struct stub_priv *priv;
struct usbip_device *ud = &sdev->ud;
struct usb_device *udev = sdev->udev;
- int pipe = get_pipe(sdev, pdu->base.ep, pdu->base.direction);
+ int pipe = get_pipe(sdev, pdu);
+
+ if (pipe == -1)
+ return;
priv = stub_priv_alloc(sdev, pdu);
if (!priv)
@@ -466,7 +493,8 @@
}
/* allocate urb transfer buffer, if needed */
- if (pdu->u.cmd_submit.transfer_buffer_length > 0) {
+ if (pdu->u.cmd_submit.transfer_buffer_length > 0 &&
+ pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) {
priv->urb->transfer_buffer =
kzalloc(pdu->u.cmd_submit.transfer_buffer_length,
GFP_KERNEL);
diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c
index be50cef..96aa375 100644
--- a/drivers/usb/usbip/stub_tx.c
+++ b/drivers/usb/usbip/stub_tx.c
@@ -102,7 +102,7 @@
/* link a urb to the queue of tx. */
spin_lock_irqsave(&sdev->priv_lock, flags);
if (sdev->ud.tcp_socket == NULL) {
- usbip_dbg_stub_tx("ignore urb for closed connection %p", urb);
+ usbip_dbg_stub_tx("ignore urb for closed connection\n");
/* It will be freed in stub_device_cleanup_urbs(). */
} else if (priv->unlinking) {
stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status);
@@ -181,6 +181,13 @@
memset(&pdu_header, 0, sizeof(pdu_header));
memset(&msg, 0, sizeof(msg));
+ if (urb->actual_length > 0 && !urb->transfer_buffer) {
+ dev_err(&sdev->udev->dev,
+ "urb: actual_length %d transfer_buffer null\n",
+ urb->actual_length);
+ return -1;
+ }
+
if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS)
iovnum = 2 + urb->number_of_packets;
else
@@ -197,8 +204,8 @@
/* 1. setup usbip_header */
setup_ret_submit_pdu(&pdu_header, urb);
- usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
- pdu_header.base.seqnum, urb);
+ usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
+ pdu_header.base.seqnum);
usbip_header_correct_endian(&pdu_header, 1);
iov[iovnum].iov_base = &pdu_header;
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index 8b23229..2a5d318 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -105,7 +105,7 @@
dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)",
udev->devnum, udev->devpath, usb_speed_string(udev->speed));
- pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
+ pr_debug("tt hub ttport %d\n", udev->ttport);
dev_dbg(dev, " ");
for (i = 0; i < 16; i++)
@@ -138,12 +138,8 @@
}
pr_debug("\n");
- dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
-
- dev_dbg(dev,
- "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
- &udev->descriptor, udev->config,
- udev->actconfig, udev->rawdescriptors);
+ dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev),
+ udev->bus->bus_name);
dev_dbg(dev, "have_langid %d, string_langid %d\n",
udev->have_langid, udev->string_langid);
@@ -251,9 +247,6 @@
dev = &urb->dev->dev;
- dev_dbg(dev, " urb :%p\n", urb);
- dev_dbg(dev, " dev :%p\n", urb->dev);
-
usbip_dump_usb_device(urb->dev);
dev_dbg(dev, " pipe :%08x ", urb->pipe);
@@ -262,11 +255,9 @@
dev_dbg(dev, " status :%d\n", urb->status);
dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags);
- dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer);
dev_dbg(dev, " transfer_buffer_length:%d\n",
urb->transfer_buffer_length);
dev_dbg(dev, " actual_length :%d\n", urb->actual_length);
- dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet);
if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
usbip_dump_usb_ctrlrequest(
@@ -276,8 +267,6 @@
dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets);
dev_dbg(dev, " interval :%d\n", urb->interval);
dev_dbg(dev, " error_count :%d\n", urb->error_count);
- dev_dbg(dev, " context :%p\n", urb->context);
- dev_dbg(dev, " complete :%p\n", urb->complete);
}
EXPORT_SYMBOL_GPL(usbip_dump_urb);
@@ -335,13 +324,10 @@
char *bp = buf;
int osize = size;
- usbip_dbg_xmit("enter\n");
-
- if (!sock || !buf || !size) {
- pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf,
- size);
+ if (!sock || !buf || !size)
return -EINVAL;
- }
+
+ usbip_dbg_xmit("enter\n");
do {
sock->sk->sk_allocation = GFP_NOIO;
@@ -354,11 +340,8 @@
msg.msg_flags = MSG_NOSIGNAL;
result = kernel_recvmsg(sock, &msg, &iov, 1, size, MSG_WAITALL);
- if (result <= 0) {
- pr_debug("receive sock %p buf %p size %u ret %d total %d\n",
- sock, buf, size, result, total);
+ if (result <= 0)
goto err;
- }
size -= result;
buf += result;
diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h
index 9f49037..f0b955f 100644
--- a/drivers/usb/usbip/usbip_common.h
+++ b/drivers/usb/usbip/usbip_common.h
@@ -271,6 +271,7 @@
/* lock for status */
spinlock_t lock;
+ int sockfd;
struct socket *tcp_socket;
struct task_struct *tcp_rx;
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index d6dc165..dbe615b 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -300,7 +300,7 @@
case USB_PORT_FEAT_POWER:
usbip_dbg_vhci_rh(
" ClearPortFeature: USB_PORT_FEAT_POWER\n");
- dum->port_status[rhport] = 0;
+ dum->port_status[rhport] &= ~USB_PORT_STAT_POWER;
dum->resuming = 0;
break;
case USB_PORT_FEAT_C_RESET:
@@ -506,9 +506,6 @@
struct vhci_device *vdev;
unsigned long flags;
- usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n",
- hcd, urb, mem_flags);
-
if (portnum > VHCI_HC_PORTS) {
pr_err("invalid port number %d\n", portnum);
return -ENODEV;
@@ -671,8 +668,6 @@
struct vhci_device *vdev;
unsigned long flags;
- pr_info("dequeue a urb %p\n", urb);
-
spin_lock_irqsave(&vhci->lock, flags);
priv = urb->hcpriv;
@@ -700,7 +695,6 @@
/* tcp connection is closed */
spin_lock(&vdev->priv_lock);
- pr_info("device %p seems to be disconnected\n", vdev);
list_del(&priv->list);
kfree(priv);
urb->hcpriv = NULL;
@@ -712,8 +706,6 @@
* vhci_rx will receive RET_UNLINK and give back the URB.
* Otherwise, we give back it here.
*/
- pr_info("gives back urb %p\n", urb);
-
usb_hcd_unlink_urb_from_ep(hcd, urb);
spin_unlock_irqrestore(&vhci->lock, flags);
@@ -741,8 +733,6 @@
unlink->unlink_seqnum = priv->seqnum;
- pr_info("device %p seems to be still connected\n", vdev);
-
/* send cmd_unlink and try to cancel the pending URB in the
* peer */
list_add_tail(&unlink->list, &vdev->unlink_tx);
@@ -823,7 +813,7 @@
/* need this? see stub_dev.c */
if (ud->tcp_socket) {
- pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket);
+ pr_debug("shutdown tcp_socket\n");
kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
}
diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c
index fc2d319..5943dee 100644
--- a/drivers/usb/usbip/vhci_rx.c
+++ b/drivers/usb/usbip/vhci_rx.c
@@ -37,24 +37,23 @@
urb = priv->urb;
status = urb->status;
- usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n",
- urb, priv, seqnum);
+ usbip_dbg_vhci_rx("find urb seqnum %u\n", seqnum);
switch (status) {
case -ENOENT:
/* fall through */
case -ECONNRESET:
- dev_info(&urb->dev->dev,
- "urb %p was unlinked %ssynchronuously.\n", urb,
- status == -ENOENT ? "" : "a");
+ dev_dbg(&urb->dev->dev,
+ "urb seq# %u was unlinked %ssynchronuously\n",
+ seqnum, status == -ENOENT ? "" : "a");
break;
case -EINPROGRESS:
/* no info output */
break;
default:
- dev_info(&urb->dev->dev,
- "urb %p may be in a error, status %d\n", urb,
- status);
+ dev_dbg(&urb->dev->dev,
+ "urb seq# %u may be in a error, status %d\n",
+ seqnum, status);
}
list_del(&priv->list);
@@ -80,8 +79,8 @@
spin_unlock_irqrestore(&vdev->priv_lock, flags);
if (!urb) {
- pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum);
- pr_info("max seqnum %d\n",
+ pr_err("cannot find a urb of seqnum %u max seqnum %d\n",
+ pdu->base.seqnum,
atomic_read(&vhci->seqnum));
usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
return;
@@ -104,7 +103,7 @@
if (usbip_dbg_flag_vhci_rx)
usbip_dump_urb(urb);
- usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
+ usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum);
spin_lock_irqsave(&vhci->lock, flags);
usb_hcd_unlink_urb_from_ep(vhci_to_hcd(vhci), urb);
@@ -170,7 +169,7 @@
pr_info("the urb (seqnum %d) was already given back\n",
pdu->base.seqnum);
} else {
- usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
+ usbip_dbg_vhci_rx("now giveback urb %d\n", pdu->base.seqnum);
/* If unlink is successful, status is -ECONNRESET */
urb->status = pdu->u.ret_unlink.status;
diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c
index c404017..c287ccc 100644
--- a/drivers/usb/usbip/vhci_sysfs.c
+++ b/drivers/usb/usbip/vhci_sysfs.c
@@ -49,13 +49,17 @@
/*
* output example:
- * port sta spd dev socket local_busid
- * 0000 004 000 00000000 c5a7bb80 1-2.3
- * 0001 004 000 00000000 d8cee980 2-3.4
+ * port sta spd dev sockfd local_busid
+ * 0000 004 000 00000000 000003 1-2.3
+ * 0001 004 000 00000000 000004 2-3.4
*
- * IP address can be retrieved from a socket pointer address by looking
- * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a
- * port number and its peer IP address.
+ * Output includes socket fd instead of socket pointer address to
+ * avoid leaking kernel memory address in:
+ * /sys/devices/platform/vhci_hcd.0/status and in debug output.
+ * The socket pointer address is not used at the moment and it was
+ * made visible as a convenient way to find IP address from socket
+ * pointer address by looking up /proc/net/{tcp,tcp6}. As this opens
+ * a security hole, the change is made to use sockfd instead.
*/
for (i = 0; i < VHCI_HC_PORTS; i++) {
struct vhci_device *vdev = &vhci->vdev[i];
@@ -68,13 +72,13 @@
if (vdev->ud.status == VDEV_ST_USED) {
out += sprintf(out, "%03u %08x ",
vdev->speed, vdev->devid);
- out += sprintf(out, "%16p %s",
- vdev->ud.tcp_socket,
+ out += sprintf(out, "%06u %s",
+ vdev->ud.sockfd,
dev_name(&vdev->udev->dev));
} else {
out += sprintf(out, "000 00000000 ");
- out += sprintf(out, "0000000000000000 0-0");
+ out += sprintf(out, "000000 0-0");
}
out += sprintf(out, "\n");
@@ -125,7 +129,7 @@
int pdev_nr;
out += sprintf(out,
- "port sta spd dev socket local_busid\n");
+ "port sta spd dev sockfd local_busid\n");
pdev_nr = status_name_to_id(attr->attr.name);
if (pdev_nr < 0)
@@ -324,6 +328,7 @@
vdev->devid = devid;
vdev->speed = speed;
+ vdev->ud.sockfd = sockfd;
vdev->ud.tcp_socket = socket;
vdev->ud.status = VDEV_ST_NOTASSIGNED;
@@ -361,6 +366,7 @@
status->attr.attr.name = status->name;
status->attr.attr.mode = S_IRUGO;
status->attr.show = status_show;
+ sysfs_attr_init(&status->attr.attr);
}
static int init_status_attrs(void)
diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c
index 3e7878f..a9a663a 100644
--- a/drivers/usb/usbip/vhci_tx.c
+++ b/drivers/usb/usbip/vhci_tx.c
@@ -83,7 +83,8 @@
memset(&msg, 0, sizeof(msg));
memset(&iov, 0, sizeof(iov));
- usbip_dbg_vhci_tx("setup txdata urb %p\n", urb);
+ usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n",
+ priv->seqnum);
/* 1. setup usbip_header */
setup_cmd_submit_pdu(&pdu_header, urb);
diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c
index e429b59..d020e72 100644
--- a/drivers/usb/usbip/vudc_rx.c
+++ b/drivers/usb/usbip/vudc_rx.c
@@ -132,6 +132,25 @@
urb_p->new = 1;
urb_p->seqnum = pdu->base.seqnum;
+ if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) {
+ /* validate packet size and number of packets */
+ unsigned int maxp, packets, bytes;
+
+ maxp = usb_endpoint_maxp(urb_p->ep->desc);
+ maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc);
+ bytes = pdu->u.cmd_submit.transfer_buffer_length;
+ packets = DIV_ROUND_UP(bytes, maxp);
+
+ if (pdu->u.cmd_submit.number_of_packets < 0 ||
+ pdu->u.cmd_submit.number_of_packets > packets) {
+ dev_err(&udc->gadget.dev,
+ "CMD_SUBMIT: isoc invalid num packets %d\n",
+ pdu->u.cmd_submit.number_of_packets);
+ ret = -EMSGSIZE;
+ goto free_urbp;
+ }
+ }
+
ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type);
if (ret) {
usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC);
diff --git a/drivers/usb/usbip/vudc_tx.c b/drivers/usb/usbip/vudc_tx.c
index 2346617..3ab4c86 100644
--- a/drivers/usb/usbip/vudc_tx.c
+++ b/drivers/usb/usbip/vudc_tx.c
@@ -97,6 +97,13 @@
memset(&pdu_header, 0, sizeof(pdu_header));
memset(&msg, 0, sizeof(msg));
+ if (urb->actual_length > 0 && !urb->transfer_buffer) {
+ dev_err(&udc->gadget.dev,
+ "urb: actual_length %d transfer_buffer null\n",
+ urb->actual_length);
+ return -1;
+ }
+
if (urb_p->type == USB_ENDPOINT_XFER_ISOC)
iovnum = 2 + urb->number_of_packets;
else
@@ -112,8 +119,8 @@
/* 1. setup usbip_header */
setup_ret_submit_pdu(&pdu_header, urb_p);
- usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
- pdu_header.base.seqnum, urb);
+ usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
+ pdu_header.base.seqnum);
usbip_header_correct_endian(&pdu_header, 1);
iov[iovnum].iov_base = &pdu_header;
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 65d4a30..9f1ec43 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -851,11 +851,13 @@
/*
* Allow writes to device control fields, except devctl_phantom,
- * which could confuse IOMMU, and the ARI bit in devctl2, which
+ * which could confuse IOMMU, MPS, which can break communication
+ * with other physical devices, and the ARI bit in devctl2, which
* is set at probe time. FLR gets virtualized via our writefn.
*/
p_setw(perm, PCI_EXP_DEVCTL,
- PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM);
+ PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD,
+ ~PCI_EXP_DEVCTL_PHANTOM);
p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI);
return 0;
}
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 85d3e64..59b3f62 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -1123,12 +1123,11 @@
mutex_lock(&container->lock);
ret = tce_iommu_create_default_window(container);
- if (ret)
- return ret;
-
- ret = tce_iommu_create_window(container, create.page_shift,
- create.window_size, create.levels,
- &create.start_addr);
+ if (!ret)
+ ret = tce_iommu_create_window(container,
+ create.page_shift,
+ create.window_size, create.levels,
+ &create.start_addr);
mutex_unlock(&container->lock);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 96a0661..e5b7652 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1078,6 +1078,7 @@
}
vhost_net_stop(n, &tx_sock, &rx_sock);
vhost_net_flush(n);
+ vhost_dev_stop(&n->dev);
vhost_dev_reset_owner(&n->dev, umem);
vhost_net_vq_reset(n);
done:
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index e3fad30..0ec970c 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -218,6 +218,46 @@
return len;
}
+static int
+vhost_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+ struct vhost_vsock *vsock;
+ struct virtio_vsock_pkt *pkt, *n;
+ int cnt = 0;
+ LIST_HEAD(freeme);
+
+ /* Find the vhost_vsock according to guest context id */
+ vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
+ if (!vsock)
+ return -ENODEV;
+
+ spin_lock_bh(&vsock->send_pkt_list_lock);
+ list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
+ if (pkt->vsk != vsk)
+ continue;
+ list_move(&pkt->list, &freeme);
+ }
+ spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+ list_for_each_entry_safe(pkt, n, &freeme, list) {
+ if (pkt->reply)
+ cnt++;
+ list_del(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+
+ if (cnt) {
+ struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
+ int new_cnt;
+
+ new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
+ if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
+ vhost_poll_queue(&tx_vq->poll);
+ }
+
+ return 0;
+}
+
static struct virtio_vsock_pkt *
vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
unsigned int out, unsigned int in)
@@ -669,6 +709,7 @@
.release = virtio_transport_release,
.connect = virtio_transport_connect,
.shutdown = virtio_transport_shutdown,
+ .cancel_pkt = vhost_transport_cancel_pkt,
.dgram_enqueue = virtio_transport_dgram_enqueue,
.dgram_dequeue = virtio_transport_dgram_dequeue,
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index 1261400..d95ae09 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -79,14 +79,17 @@
static int compute_duty_cycle(struct pwm_bl_data *pb, int brightness)
{
unsigned int lth = pb->lth_brightness;
- int duty_cycle;
+ u64 duty_cycle;
if (pb->levels)
duty_cycle = pb->levels[brightness];
else
duty_cycle = brightness;
- return (duty_cycle * (pb->period - lth) / pb->scale) + lth;
+ duty_cycle *= pb->period - lth;
+ do_div(duty_cycle, pb->scale);
+
+ return duty_cycle + lth;
}
static int pwm_backlight_update_status(struct backlight_device *bl)
diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
index 6c2b2ca..44c2be1 100644
--- a/drivers/video/fbdev/au1200fb.c
+++ b/drivers/video/fbdev/au1200fb.c
@@ -1681,8 +1681,10 @@
fbi = framebuffer_alloc(sizeof(struct au1200fb_device),
&dev->dev);
- if (!fbi)
+ if (!fbi) {
+ ret = -ENOMEM;
goto failed;
+ }
_au1200fb_infos[plane] = fbi;
fbdev = fbi->par;
@@ -1700,7 +1702,8 @@
if (!fbdev->fb_mem) {
print_err("fail to allocate frambuffer (size: %dK))",
fbdev->fb_len / 1024);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto failed;
}
/*
diff --git a/drivers/video/fbdev/controlfb.h b/drivers/video/fbdev/controlfb.h
index 6026c60..261522f 100644
--- a/drivers/video/fbdev/controlfb.h
+++ b/drivers/video/fbdev/controlfb.h
@@ -141,5 +141,7 @@
{{ 1, 2}}, /* 1152x870, 75Hz */
{{ 0, 1}}, /* 1280x960, 75Hz */
{{ 0, 1}}, /* 1280x1024, 75Hz */
+ {{ 1, 2}}, /* 1152x768, 60Hz */
+ {{ 0, 1}}, /* 1600x1024, 60Hz */
};
diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index e9c2f7b..53326ba 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -769,11 +769,11 @@
for (i = 0; i < len; i++) {
ret = usb_control_msg(dev->udev,
- usb_rcvctrlpipe(dev->udev, 0), (0x02),
- (0x80 | (0x02 << 5)), i << 8, 0xA1, rbuf, 2,
- HZ);
- if (ret < 1) {
- pr_err("Read EDID byte %d failed err %x\n", i, ret);
+ usb_rcvctrlpipe(dev->udev, 0), 0x02,
+ (0x80 | (0x02 << 5)), i << 8, 0xA1,
+ rbuf, 2, USB_CTRL_GET_TIMEOUT);
+ if (ret < 2) {
+ pr_err("Read EDID byte %d failed: %d\n", i, ret);
i--;
break;
}
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 7062bb0..462e183 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -323,6 +323,8 @@
/* device_register() causes the bus infrastructure to look for a
* matching driver. */
err = device_register(&dev->dev);
+ if (err)
+ ida_simple_remove(&virtio_index_ida, dev->index);
out:
if (err)
add_status(dev, VIRTIO_CONFIG_S_FAILED);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 2c2e679..a7c08cc 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -241,11 +241,11 @@
#define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
-static void update_balloon_stats(struct virtio_balloon *vb)
+static unsigned int update_balloon_stats(struct virtio_balloon *vb)
{
unsigned long events[NR_VM_EVENT_ITEMS];
struct sysinfo i;
- int idx = 0;
+ unsigned int idx = 0;
long available;
all_vm_events(events);
@@ -253,18 +253,22 @@
available = si_mem_available();
+#ifdef CONFIG_VM_EVENT_COUNTERS
update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
pages_to_bytes(events[PSWPIN]));
update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
pages_to_bytes(events[PSWPOUT]));
update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+#endif
update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
pages_to_bytes(i.freeram));
update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT,
pages_to_bytes(i.totalram));
update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL,
pages_to_bytes(available));
+
+ return idx;
}
/*
@@ -290,14 +294,14 @@
{
struct virtqueue *vq;
struct scatterlist sg;
- unsigned int len;
+ unsigned int len, num_stats;
- update_balloon_stats(vb);
+ num_stats = update_balloon_stats(vb);
vq = vb->stats_vq;
if (!virtqueue_get_buf(vq, &len))
return;
- sg_init_one(&sg, vb->stats, sizeof(vb->stats));
+ sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
virtqueue_kick(vq);
}
@@ -421,15 +425,16 @@
vb->deflate_vq = vqs[1];
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
struct scatterlist sg;
+ unsigned int num_stats;
vb->stats_vq = vqs[2];
/*
* Prime this virtqueue with one buffer so the hypervisor can
* use it to signal us later (it can't be broken yet!).
*/
- update_balloon_stats(vb);
+ num_stats = update_balloon_stats(vb);
- sg_init_one(&sg, vb->stats, sizeof vb->stats);
+ sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
< 0)
BUG();
diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index 4874b0f..518dfa1 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c
@@ -169,15 +169,21 @@
return 0;
}
-static int imx2_wdt_set_timeout(struct watchdog_device *wdog,
- unsigned int new_timeout)
+static void __imx2_wdt_set_timeout(struct watchdog_device *wdog,
+ unsigned int new_timeout)
{
struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
- wdog->timeout = new_timeout;
-
regmap_update_bits(wdev->regmap, IMX2_WDT_WCR, IMX2_WDT_WCR_WT,
WDOG_SEC_TO_COUNT(new_timeout));
+}
+
+static int imx2_wdt_set_timeout(struct watchdog_device *wdog,
+ unsigned int new_timeout)
+{
+ __imx2_wdt_set_timeout(wdog, new_timeout);
+
+ wdog->timeout = new_timeout;
return 0;
}
@@ -371,7 +377,11 @@
/* The watchdog IP block is running */
if (imx2_wdt_is_running(wdev)) {
- imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME);
+ /*
+ * Don't update wdog->timeout, we'll restore the current value
+ * during resume.
+ */
+ __imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME);
imx2_wdt_ping(wdog);
}
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 1e9d2f8..1592dc6 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -362,7 +362,7 @@
{
struct afs_server *server;
struct afs_vnode *vnode, *xvnode;
- time_t now;
+ time64_t now;
long timeout;
int ret;
@@ -370,7 +370,7 @@
_enter("");
- now = get_seconds();
+ now = ktime_get_real_seconds();
/* find the first vnode to update */
spin_lock(&server->cb_lock);
@@ -424,7 +424,8 @@
/* and then reschedule */
_debug("reschedule");
- vnode->update_at = get_seconds() + afs_vnode_update_timeout;
+ vnode->update_at = ktime_get_real_seconds() +
+ afs_vnode_update_timeout;
spin_lock(&server->cb_lock);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index d764236..168f2a4 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -106,6 +106,9 @@
case CBProbe:
call->type = &afs_SRXCBProbe;
return true;
+ case CBProbeUuid:
+ call->type = &afs_SRXCBProbeUuid;
+ return true;
case CBTellMeAboutYourself:
call->type = &afs_SRXCBTellMeAboutYourself;
return true;
@@ -165,7 +168,6 @@
struct afs_callback *cb;
struct afs_server *server;
__be32 *bp;
- u32 tmp;
int ret, loop;
_enter("{%u}", call->unmarshall);
@@ -227,9 +229,9 @@
if (ret < 0)
return ret;
- tmp = ntohl(call->tmp);
- _debug("CB count: %u", tmp);
- if (tmp != call->count && tmp != 0)
+ call->count2 = ntohl(call->tmp);
+ _debug("CB count: %u", call->count2);
+ if (call->count2 != call->count && call->count2 != 0)
return -EBADMSG;
call->offset = 0;
call->unmarshall++;
@@ -237,14 +239,14 @@
case 4:
_debug("extract CB array");
ret = afs_extract_data(call, call->buffer,
- call->count * 3 * 4, false);
+ call->count2 * 3 * 4, false);
if (ret < 0)
return ret;
_debug("unmarshall CB array");
cb = call->request;
bp = call->buffer;
- for (loop = call->count; loop > 0; loop--, cb++) {
+ for (loop = call->count2; loop > 0; loop--, cb++) {
cb->version = ntohl(*bp++);
cb->expiry = ntohl(*bp++);
cb->type = ntohl(*bp++);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 6344aee..7237297 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -29,6 +29,7 @@
const struct file_operations afs_file_operations = {
.open = afs_open,
+ .flush = afs_flush,
.release = afs_release,
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 31c616a..88e44060 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -105,7 +105,7 @@
vnode->vfs_inode.i_mode = mode;
}
- vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server;
+ vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client;
vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime;
vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime;
vnode->vfs_inode.i_version = data_version;
@@ -139,7 +139,7 @@
vnode->cb_version = ntohl(*bp++);
vnode->cb_expiry = ntohl(*bp++);
vnode->cb_type = ntohl(*bp++);
- vnode->cb_expires = vnode->cb_expiry + get_seconds();
+ vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds();
*_bp = bp;
}
@@ -676,8 +676,8 @@
memset(bp, 0, padsz);
bp = (void *) bp + padsz;
}
- *bp++ = htonl(AFS_SET_MODE);
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
@@ -945,8 +945,8 @@
memset(bp, 0, c_padsz);
bp = (void *) bp + c_padsz;
}
- *bp++ = htonl(AFS_SET_MODE);
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = htonl(S_IRWXUGO); /* unix mode */
@@ -1145,8 +1145,8 @@
*bp++ = htonl(vnode->fid.vnode);
*bp++ = htonl(vnode->fid.unique);
- *bp++ = 0; /* mask */
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MTIME); /* mask */
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = 0; /* unix mode */
@@ -1178,7 +1178,7 @@
_enter(",%x,{%x:%u},,",
key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
- size = to - offset;
+ size = (loff_t)to - (loff_t)offset;
if (first != last)
size += (loff_t)(last - first) << PAGE_SHIFT;
pos = (loff_t)first << PAGE_SHIFT;
@@ -1222,8 +1222,8 @@
*bp++ = htonl(vnode->fid.vnode);
*bp++ = htonl(vnode->fid.unique);
- *bp++ = 0; /* mask */
- *bp++ = 0; /* mtime */
+ *bp++ = htonl(AFS_SET_MTIME); /* mask */
+ *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = 0; /* unix mode */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 86cc726..42582e4 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -70,9 +70,9 @@
set_nlink(inode, vnode->status.nlink);
inode->i_uid = vnode->status.owner;
- inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_gid = vnode->status.group;
inode->i_size = vnode->status.size;
- inode->i_ctime.tv_sec = vnode->status.mtime_server;
+ inode->i_ctime.tv_sec = vnode->status.mtime_client;
inode->i_ctime.tv_nsec = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime;
inode->i_blocks = 0;
@@ -245,12 +245,13 @@
vnode->cb_version = 0;
vnode->cb_expiry = 0;
vnode->cb_type = 0;
- vnode->cb_expires = get_seconds();
+ vnode->cb_expires = ktime_get_real_seconds();
} else {
vnode->cb_version = cb->version;
vnode->cb_expiry = cb->expiry;
vnode->cb_type = cb->type;
- vnode->cb_expires = vnode->cb_expiry + get_seconds();
+ vnode->cb_expires = vnode->cb_expiry +
+ ktime_get_real_seconds();
}
}
@@ -323,7 +324,7 @@
!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
!test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- if (vnode->cb_expires < get_seconds() + 10) {
+ if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
_debug("callback expired");
set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
} else {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 535a38d..dd98dcd 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -11,6 +11,7 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/ktime.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/rxrpc.h>
@@ -105,7 +106,10 @@
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned first_offset; /* offset into mapping[first] */
- unsigned last_to; /* amount of mapping[last] */
+ union {
+ unsigned last_to; /* amount of mapping[last] */
+ unsigned count2; /* count used in unmarshalling */
+ };
unsigned char unmarshall; /* unmarshalling phase */
bool incoming; /* T if incoming call */
bool send_pages; /* T if data from mapping should be sent */
@@ -242,7 +246,7 @@
*/
struct afs_vlocation {
atomic_t usage;
- time_t time_of_death; /* time at which put reduced usage to 0 */
+ time64_t time_of_death; /* time at which put reduced usage to 0 */
struct list_head link; /* link in cell volume location list */
struct list_head grave; /* link in master graveyard list */
struct list_head update; /* link in master update list */
@@ -253,7 +257,7 @@
struct afs_cache_vlocation vldb; /* volume information DB record */
struct afs_volume *vols[3]; /* volume access record pointer (index by type) */
wait_queue_head_t waitq; /* status change waitqueue */
- time_t update_at; /* time at which record should be updated */
+ time64_t update_at; /* time at which record should be updated */
spinlock_t lock; /* access lock */
afs_vlocation_state_t state; /* volume location state */
unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */
@@ -266,7 +270,7 @@
*/
struct afs_server {
atomic_t usage;
- time_t time_of_death; /* time at which put reduced usage to 0 */
+ time64_t time_of_death; /* time at which put reduced usage to 0 */
struct in_addr addr; /* server address */
struct afs_cell *cell; /* cell in which server resides */
struct list_head link; /* link in cell's server list */
@@ -369,8 +373,8 @@
struct rb_node server_rb; /* link in server->fs_vnodes */
struct rb_node cb_promise; /* link in server->cb_promises */
struct work_struct cb_broken_work; /* work to be done on callback break */
- time_t cb_expires; /* time at which callback expires */
- time_t cb_expires_at; /* time used to order cb_promise */
+ time64_t cb_expires; /* time at which callback expires */
+ time64_t cb_expires_at; /* time used to order cb_promise */
unsigned cb_version; /* callback version */
unsigned cb_expiry; /* callback expiry time */
afs_callback_type_t cb_type; /* type of callback */
@@ -749,6 +753,7 @@
extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
extern int afs_writeback_all(struct afs_vnode *);
+extern int afs_flush(struct file *, fl_owner_t);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 91ea1aa..100b207 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -84,6 +84,8 @@
case RXKADDATALEN: return -EKEYREJECTED;
case RXKADILLEGALLEVEL: return -EKEYREJECTED;
+ case RXGEN_OPCODE: return -ENOTSUPP;
+
default: return -EREMOTEIO;
}
}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 25f05a8..523b1d3 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -321,6 +321,8 @@
struct rxrpc_call *rxcall;
struct msghdr msg;
struct kvec iov[1];
+ size_t offset;
+ u32 abort_code;
int ret;
_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
@@ -368,9 +370,11 @@
msg.msg_controllen = 0;
msg.msg_flags = (call->send_pages ? MSG_MORE : 0);
- /* have to change the state *before* sending the last packet as RxRPC
- * might give us the reply before it returns from sending the
- * request */
+ /* We have to change the state *before* sending the last packet as
+ * rxrpc might give us the reply before it returns from sending the
+ * request. Further, if the send fails, we may already have been given
+ * a notification and may have collected it.
+ */
if (!call->send_pages)
call->state = AFS_CALL_AWAIT_REPLY;
ret = rxrpc_kernel_send_data(afs_socket, rxcall,
@@ -389,7 +393,17 @@
return wait_mode->wait(call);
error_do_abort:
- rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
+ call->state = AFS_CALL_COMPLETE;
+ if (ret != -ECONNABORTED) {
+ rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT,
+ -ret, "KSD");
+ } else {
+ abort_code = 0;
+ offset = 0;
+ rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset,
+ false, &abort_code);
+ ret = call->type->abort_to_error(abort_code);
+ }
error_kill_call:
afs_end_call(call);
_leave(" = %d", ret);
@@ -434,16 +448,18 @@
case -EINPROGRESS:
case -EAGAIN:
goto out;
+ case -ECONNABORTED:
+ goto call_complete;
case -ENOTCONN:
abort_code = RX_CALL_DEAD;
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
abort_code, -ret, "KNC");
- goto do_abort;
+ goto save_error;
case -ENOTSUPP:
- abort_code = RX_INVALID_OPERATION;
+ abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
abort_code, -ret, "KIV");
- goto do_abort;
+ goto save_error;
case -ENODATA:
case -EBADMSG:
case -EMSGSIZE:
@@ -453,7 +469,7 @@
abort_code = RXGEN_SS_UNMARSHAL;
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
abort_code, EBADMSG, "KUM");
- goto do_abort;
+ goto save_error;
}
}
@@ -464,8 +480,9 @@
_leave("");
return;
-do_abort:
+save_error:
call->error = ret;
+call_complete:
call->state = AFS_CALL_COMPLETE;
goto done;
}
@@ -475,7 +492,6 @@
*/
static int afs_wait_for_call_to_complete(struct afs_call *call)
{
- const char *abort_why;
int ret;
DECLARE_WAITQUEUE(myself, current);
@@ -494,13 +510,8 @@
continue;
}
- abort_why = "KWC";
- ret = call->error;
- if (call->state == AFS_CALL_COMPLETE)
- break;
- abort_why = "KWI";
- ret = -EINTR;
- if (signal_pending(current))
+ if (call->state == AFS_CALL_COMPLETE ||
+ signal_pending(current))
break;
schedule();
}
@@ -508,13 +519,14 @@
remove_wait_queue(&call->waitq, &myself);
__set_current_state(TASK_RUNNING);
- /* kill the call */
+ /* Kill off the call if it's still live. */
if (call->state < AFS_CALL_COMPLETE) {
- _debug("call incomplete");
+ _debug("call interrupted");
rxrpc_kernel_abort_call(afs_socket, call->rxcall,
- RX_CALL_DEAD, -ret, abort_why);
+ RX_USER_ABORT, -EINTR, "KWI");
}
+ ret = call->error;
_debug("call complete");
afs_end_call(call);
_leave(" = %d", ret);
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 8d01042..bfa9d34 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -340,17 +340,22 @@
} else {
if (!(access & AFS_ACE_LOOKUP))
goto permission_denied;
+ if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR))
+ goto permission_denied;
if (mask & (MAY_EXEC | MAY_READ)) {
if (!(access & AFS_ACE_READ))
goto permission_denied;
+ if (!(inode->i_mode & S_IRUSR))
+ goto permission_denied;
} else if (mask & MAY_WRITE) {
if (!(access & AFS_ACE_WRITE))
goto permission_denied;
+ if (!(inode->i_mode & S_IWUSR))
+ goto permission_denied;
}
}
key_put(key);
- ret = generic_permission(inode, mask);
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index d4066ab..c001b1f 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -242,7 +242,7 @@
spin_lock(&afs_server_graveyard_lock);
if (atomic_read(&server->usage) == 0) {
list_move_tail(&server->grave, &afs_server_graveyard);
- server->time_of_death = get_seconds();
+ server->time_of_death = ktime_get_real_seconds();
queue_delayed_work(afs_wq, &afs_server_reaper,
afs_server_timeout * HZ);
}
@@ -277,9 +277,9 @@
LIST_HEAD(corpses);
struct afs_server *server;
unsigned long delay, expiry;
- time_t now;
+ time64_t now;
- now = get_seconds();
+ now = ktime_get_real_seconds();
spin_lock(&afs_server_graveyard_lock);
while (!list_empty(&afs_server_graveyard)) {
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 45a8639..92bd555 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -340,7 +340,8 @@
struct afs_vlocation *xvl;
/* wait at least 10 minutes before updating... */
- vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+ vl->update_at = ktime_get_real_seconds() +
+ afs_vlocation_update_timeout;
spin_lock(&afs_vlocation_updates_lock);
@@ -506,7 +507,7 @@
if (atomic_read(&vl->usage) == 0) {
_debug("buried");
list_move_tail(&vl->grave, &afs_vlocation_graveyard);
- vl->time_of_death = get_seconds();
+ vl->time_of_death = ktime_get_real_seconds();
queue_delayed_work(afs_wq, &afs_vlocation_reap,
afs_vlocation_timeout * HZ);
@@ -543,11 +544,11 @@
LIST_HEAD(corpses);
struct afs_vlocation *vl;
unsigned long delay, expiry;
- time_t now;
+ time64_t now;
_enter("");
- now = get_seconds();
+ now = ktime_get_real_seconds();
spin_lock(&afs_vlocation_graveyard_lock);
while (!list_empty(&afs_vlocation_graveyard)) {
@@ -622,13 +623,13 @@
{
struct afs_cache_vlocation vldb;
struct afs_vlocation *vl, *xvl;
- time_t now;
+ time64_t now;
long timeout;
int ret;
_enter("");
- now = get_seconds();
+ now = ktime_get_real_seconds();
/* find a record to update */
spin_lock(&afs_vlocation_updates_lock);
@@ -684,7 +685,8 @@
/* and then reschedule */
_debug("reschedule");
- vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+ vl->update_at = ktime_get_real_seconds() +
+ afs_vlocation_update_timeout;
spin_lock(&afs_vlocation_updates_lock);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index f865c3f..3fba2b5 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -148,12 +148,12 @@
kfree(candidate);
return -ENOMEM;
}
- *pagep = page;
- /* page won't leak in error case: it eventually gets cleaned off LRU */
if (!PageUptodate(page) && len != PAGE_SIZE) {
ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page);
if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
kfree(candidate);
_leave(" = %d [prep]", ret);
return ret;
@@ -161,6 +161,9 @@
SetPageUptodate(page);
}
+ /* page won't leak in error case: it eventually gets cleaned off LRU */
+ *pagep = page;
+
try_again:
spin_lock(&vnode->writeback_lock);
@@ -296,10 +299,14 @@
ASSERTCMP(pv.nr, ==, count);
for (loop = 0; loop < count; loop++) {
- ClearPageUptodate(pv.pages[loop]);
+ struct page *page = pv.pages[loop];
+ ClearPageUptodate(page);
if (error)
- SetPageError(pv.pages[loop]);
- end_page_writeback(pv.pages[loop]);
+ SetPageError(page);
+ if (PageWriteback(page))
+ end_page_writeback(page);
+ if (page->index >= first)
+ first = page->index + 1;
}
__pagevec_release(&pv);
@@ -502,6 +509,7 @@
if (PageWriteback(page) || !PageDirty(page)) {
unlock_page(page);
+ put_page(page);
continue;
}
@@ -735,6 +743,20 @@
}
/*
+ * Flush out all outstanding writes on a file opened for writing when it is
+ * closed.
+ */
+int afs_flush(struct file *file, fl_owner_t id)
+{
+ _enter("");
+
+ if ((file->f_mode & FMODE_WRITE) == 0)
+ return 0;
+
+ return vfs_fsync(file, 0);
+}
+
+/*
* notification that a previously read-only page is about to become writable
* - if it returns an error, the caller will deliver a bus error signal
*/
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 4c71dba..0ea31a5 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -176,7 +176,6 @@
mutex_unlock(&sbi->wq_mutex);
- if (autofs4_write(sbi, pipe, &pkt, pktsz))
switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) {
case 0:
break;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 705bb5f..a29730c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3397,13 +3397,6 @@
goto again;
}
- /* We've already setup this transaction, go ahead and exit */
- if (block_group->cache_generation == trans->transid &&
- i_size_read(inode)) {
- dcs = BTRFS_DC_SETUP;
- goto out_put;
- }
-
/*
* We want to set the generation to 0, that way if anything goes wrong
* from here on out we know not to trust this cache when we load up next
@@ -3427,6 +3420,13 @@
}
WARN_ON(ret);
+ /* We've already setup this transaction, go ahead and exit */
+ if (block_group->cache_generation == trans->transid &&
+ i_size_read(inode)) {
+ dcs = BTRFS_DC_SETUP;
+ goto out_put;
+ }
+
if (i_size_read(inode) > 0) {
ret = btrfs_check_trunc_cache_free_space(root,
&root->fs_info->global_block_rsv);
@@ -9362,6 +9362,7 @@
ret = btrfs_del_root(trans, tree_root, &root->root_key);
if (ret) {
btrfs_abort_transaction(trans, ret);
+ err = ret;
goto out_end_trans;
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index e4b48f3..c56253a 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1253,7 +1253,7 @@
/* Lock all pages first so we can lock the extent safely. */
ret = io_ctl_prepare_pages(io_ctl, inode, 0);
if (ret)
- goto out;
+ goto out_unlock;
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
&cached_state);
@@ -1346,6 +1346,7 @@
out_nospc:
cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list);
+out_unlock:
if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
up_write(&block_group->data_rwsem);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f089d7d..a8a1fb4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2063,8 +2063,15 @@
goto out;
}
- btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state,
- 0);
+ ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
+ &cached_state, 0);
+ if (ret) {
+ mapping_set_error(page->mapping, ret);
+ end_extent_writepage(page, ret, page_start, page_end);
+ ClearPageChecked(page);
+ goto out;
+ }
+
ClearPageChecked(page);
set_page_dirty(page);
out:
@@ -6812,6 +6819,20 @@
max_size = min_t(unsigned long, PAGE_SIZE, max_size);
ret = btrfs_decompress(compress_type, tmp, page,
extent_offset, inline_size, max_size);
+
+ /*
+ * decompression code contains a memset to fill in any space between the end
+ * of the uncompressed data and the end of max_size in case the decompressed
+ * data ends up shorter than ram_bytes. That doesn't cover the hole between
+ * the end of an inline extent and the beginning of the next block, so we
+ * cover that region here.
+ */
+
+ if (max_size + pg_offset < PAGE_SIZE) {
+ char *map = kmap(page);
+ memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
+ kunmap(page);
+ }
kfree(tmp);
return ret;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 77f9efc..9a47b55 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6196,8 +6196,13 @@
goto out;
}
+ /*
+ * Check that we don't overflow at later allocations, we request
+ * clone_sources_count + 1 items, and compare to unsigned long inside
+ * access_ok.
+ */
if (arg->clone_sources_count >
- ULLONG_MAX / sizeof(*arg->clone_sources)) {
+ ULONG_MAX / sizeof(struct clone_root) - 1) {
ret = -EINVAL;
goto out;
}
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index 6e14404..a724d9a 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -501,7 +501,8 @@
path = btrfs_alloc_path();
if (!path) {
test_msg("Couldn't allocate path\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
ret = add_block_group_free_space(&trans, root->fs_info, cache);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c0f52c4..3d2639c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1396,6 +1396,29 @@
return request_close_session(mdsc, session);
}
+static bool drop_negative_children(struct dentry *dentry)
+{
+ struct dentry *child;
+ bool all_negative = true;
+
+ if (!d_is_dir(dentry))
+ goto out;
+
+ spin_lock(&dentry->d_lock);
+ list_for_each_entry(child, &dentry->d_subdirs, d_child) {
+ if (d_really_is_positive(child)) {
+ all_negative = false;
+ break;
+ }
+ }
+ spin_unlock(&dentry->d_lock);
+
+ if (all_negative)
+ shrink_dcache_parent(dentry);
+out:
+ return all_negative;
+}
+
/*
* Trim old(er) caps.
*
@@ -1441,16 +1464,27 @@
if ((used | wanted) & ~oissued & mine)
goto out; /* we need these caps */
- session->s_trim_caps--;
if (oissued) {
/* we aren't the only cap.. just remove us */
__ceph_remove_cap(cap, true);
+ session->s_trim_caps--;
} else {
+ struct dentry *dentry;
/* try dropping referring dentries */
spin_unlock(&ci->i_ceph_lock);
- d_prune_aliases(inode);
- dout("trim_caps_cb %p cap %p pruned, count now %d\n",
- inode, cap, atomic_read(&inode->i_count));
+ dentry = d_find_any_alias(inode);
+ if (dentry && drop_negative_children(dentry)) {
+ int count;
+ dput(dentry);
+ d_prune_aliases(inode);
+ count = atomic_read(&inode->i_count);
+ if (count == 1)
+ session->s_trim_caps--;
+ dout("trim_caps_cb %p cap %p pruned, count now %d\n",
+ inode, cap, count);
+ } else {
+ dput(dentry);
+ }
return 0;
}
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 5eb0412..73360df 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -318,9 +318,8 @@
{
int i;
int rc;
- char password_with_pad[CIFS_ENCPWD_SIZE];
+ char password_with_pad[CIFS_ENCPWD_SIZE] = {0};
- memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
if (password)
strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7710dd9..c4a27f6 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1667,7 +1667,7 @@
tmp_end++;
if (!(tmp_end < end && tmp_end[1] == delim)) {
/* No it is not. Set the password to NULL */
- kfree(vol->password);
+ kzfree(vol->password);
vol->password = NULL;
break;
}
@@ -1705,7 +1705,7 @@
options = end;
}
- kfree(vol->password);
+ kzfree(vol->password);
/* Now build new password string */
temp_len = strlen(value);
vol->password = kzalloc(temp_len+1, GFP_KERNEL);
@@ -4159,7 +4159,7 @@
reset_cifs_unix_caps(0, tcon, NULL, vol_info);
out:
kfree(vol_info->username);
- kfree(vol_info->password);
+ kzfree(vol_info->password);
kfree(vol_info);
return tcon;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index cf192f9..02e403a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3285,20 +3285,18 @@
int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
{
- int rc, xid;
+ int xid, rc = 0;
struct inode *inode = file_inode(file);
xid = get_xid();
- if (!CIFS_CACHE_READ(CIFS_I(inode))) {
+ if (!CIFS_CACHE_READ(CIFS_I(inode)))
rc = cifs_zap_mapping(inode);
- if (rc)
- return rc;
- }
-
- rc = generic_file_mmap(file, vma);
- if (rc == 0)
+ if (!rc)
+ rc = generic_file_mmap(file, vma);
+ if (!rc)
vma->vm_ops = &cifs_file_vm_ops;
+
free_xid(xid);
return rc;
}
@@ -3308,16 +3306,16 @@
int rc, xid;
xid = get_xid();
+
rc = cifs_revalidate_file(file);
- if (rc) {
+ if (rc)
cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
rc);
- free_xid(xid);
- return rc;
- }
- rc = generic_file_mmap(file, vma);
- if (rc == 0)
+ if (!rc)
+ rc = generic_file_mmap(file, vma);
+ if (!rc)
vma->vm_ops = &cifs_file_vm_ops;
+
free_xid(xid);
return rc;
}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 5419afe..323d8e3 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -99,14 +99,11 @@
kfree(buf_to_free->serverOS);
kfree(buf_to_free->serverDomain);
kfree(buf_to_free->serverNOS);
- if (buf_to_free->password) {
- memset(buf_to_free->password, 0, strlen(buf_to_free->password));
- kfree(buf_to_free->password);
- }
+ kzfree(buf_to_free->password);
kfree(buf_to_free->user_name);
kfree(buf_to_free->domainName);
- kfree(buf_to_free->auth_key.response);
- kfree(buf_to_free);
+ kzfree(buf_to_free->auth_key.response);
+ kzfree(buf_to_free);
}
struct cifs_tcon *
@@ -137,10 +134,7 @@
}
atomic_dec(&tconInfoAllocCount);
kfree(buf_to_free->nativeFileSystem);
- if (buf_to_free->password) {
- memset(buf_to_free->password, 0, strlen(buf_to_free->password));
- kfree(buf_to_free->password);
- }
+ kzfree(buf_to_free->password);
kfree(buf_to_free);
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 69b610ad..94c4c19 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -585,8 +585,7 @@
}
/* check validate negotiate info response matches what we got earlier */
- if (pneg_rsp->Dialect !=
- cpu_to_le16(tcon->ses->server->vals->protocol_id))
+ if (pneg_rsp->Dialect != cpu_to_le16(tcon->ses->server->dialect))
goto vneg_out;
if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode))
diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile
index 9f6607f..cb49698 100644
--- a/fs/crypto/Makefile
+++ b/fs/crypto/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o
-fscrypto-y := crypto.o fname.o policy.o keyinfo.o
+fscrypto-y := crypto.o fname.o hooks.o keyinfo.o policy.o
fscrypto-$(CONFIG_BLOCK) += bio.o
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 151d489..732a786 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -126,21 +126,6 @@
}
EXPORT_SYMBOL(fscrypt_get_ctx);
-/**
- * page_crypt_complete() - completion callback for page crypto
- * @req: The asynchronous cipher request context
- * @res: The result of the cipher operation
- */
-static void page_crypt_complete(struct crypto_async_request *req, int res)
-{
- struct fscrypt_completion_result *ecr = req->data;
-
- if (res == -EINPROGRESS)
- return;
- ecr->res = res;
- complete(&ecr->completion);
-}
-
int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
u64 lblk_num, struct page *src_page,
struct page *dest_page, unsigned int len,
@@ -151,7 +136,7 @@
u8 padding[FS_IV_SIZE - sizeof(__le64)];
} iv;
struct skcipher_request *req = NULL;
- DECLARE_FS_COMPLETION_RESULT(ecr);
+ DECLARE_CRYPTO_WAIT(wait);
struct scatterlist dst, src;
struct fscrypt_info *ci = inode->i_crypt_info;
struct crypto_skcipher *tfm = ci->ci_ctfm;
@@ -179,7 +164,7 @@
skcipher_request_set_callback(
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- page_crypt_complete, &ecr);
+ crypto_req_done, &wait);
sg_init_table(&dst, 1);
sg_set_page(&dst, dest_page, len, offs);
@@ -187,14 +172,9 @@
sg_set_page(&src, src_page, len, offs);
skcipher_request_set_crypt(req, &src, &dst, len, &iv);
if (rw == FS_DECRYPT)
- res = crypto_skcipher_decrypt(req);
+ res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
else
- res = crypto_skcipher_encrypt(req);
- if (res == -EINPROGRESS || res == -EBUSY) {
- BUG_ON(req->base.data != &ecr);
- wait_for_completion(&ecr.completion);
- res = ecr.res;
- }
+ res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
skcipher_request_free(req);
if (res) {
printk_ratelimited(KERN_ERR
@@ -340,7 +320,7 @@
return -ECHILD;
dir = dget_parent(dentry);
- if (!d_inode(dir)->i_sb->s_cop->is_encrypted(d_inode(dir))) {
+ if (!IS_ENCRYPTED(d_inode(dir))) {
dput(dir);
return 0;
}
@@ -410,10 +390,7 @@
{
int i, res = -ENOMEM;
- /*
- * No need to allocate a bounce page pool if there already is one or
- * this FS won't use it.
- */
+ /* No need to allocate a bounce page pool if this FS won't use it. */
if (cop_flags & FS_CFLG_OWN_PAGES)
return 0;
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index ad9f814..6eb4343 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -15,21 +15,6 @@
#include "fscrypt_private.h"
/**
- * fname_crypt_complete() - completion callback for filename crypto
- * @req: The asynchronous cipher request context
- * @res: The result of the cipher operation
- */
-static void fname_crypt_complete(struct crypto_async_request *req, int res)
-{
- struct fscrypt_completion_result *ecr = req->data;
-
- if (res == -EINPROGRESS)
- return;
- ecr->res = res;
- complete(&ecr->completion);
-}
-
-/**
* fname_encrypt() - encrypt a filename
*
* The caller must have allocated sufficient memory for the @oname string.
@@ -40,7 +25,7 @@
const struct qstr *iname, struct fscrypt_str *oname)
{
struct skcipher_request *req = NULL;
- DECLARE_FS_COMPLETION_RESULT(ecr);
+ DECLARE_CRYPTO_WAIT(wait);
struct fscrypt_info *ci = inode->i_crypt_info;
struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
@@ -76,17 +61,12 @@
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- fname_crypt_complete, &ecr);
+ crypto_req_done, &wait);
sg_init_one(&sg, oname->name, cryptlen);
skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv);
/* Do the encryption */
- res = crypto_skcipher_encrypt(req);
- if (res == -EINPROGRESS || res == -EBUSY) {
- /* Request is being completed asynchronously; wait for it */
- wait_for_completion(&ecr.completion);
- res = ecr.res;
- }
+ res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(KERN_ERR
@@ -110,7 +90,7 @@
struct fscrypt_str *oname)
{
struct skcipher_request *req = NULL;
- DECLARE_FS_COMPLETION_RESULT(ecr);
+ DECLARE_CRYPTO_WAIT(wait);
struct scatterlist src_sg, dst_sg;
struct fscrypt_info *ci = inode->i_crypt_info;
struct crypto_skcipher *tfm = ci->ci_ctfm;
@@ -131,7 +111,7 @@
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- fname_crypt_complete, &ecr);
+ crypto_req_done, &wait);
/* Initialize IV */
memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
@@ -140,11 +120,7 @@
sg_init_one(&src_sg, iname->name, iname->len);
sg_init_one(&dst_sg, oname->name, oname->len);
skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
- res = crypto_skcipher_decrypt(req);
- if (res == -EINPROGRESS || res == -EBUSY) {
- wait_for_completion(&ecr.completion);
- res = ecr.res;
- }
+ res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(KERN_ERR
@@ -382,8 +358,7 @@
memset(fname, 0, sizeof(struct fscrypt_name));
fname->usr_fname = iname;
- if (!dir->i_sb->s_cop->is_encrypted(dir) ||
- fscrypt_is_dot_dotdot(iname)) {
+ if (!IS_ENCRYPTED(dir) || fscrypt_is_dot_dotdot(iname)) {
fname->disk_name.name = (unsigned char *)iname->name;
fname->disk_name.len = iname->len;
return 0;
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index a1d5021..4688a64 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -11,7 +11,8 @@
#ifndef _FSCRYPT_PRIVATE_H
#define _FSCRYPT_PRIVATE_H
-#include <linux/fscrypt_supp.h>
+#define __FS_HAS_ENCRYPTION 1
+#include <linux/fscrypt.h>
#include <crypto/hash.h>
/* Encryption parameters */
@@ -69,16 +70,6 @@
#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002
-struct fscrypt_completion_result {
- struct completion completion;
- int res;
-};
-
-#define DECLARE_FS_COMPLETION_RESULT(ecr) \
- struct fscrypt_completion_result ecr = { \
- COMPLETION_INITIALIZER_ONSTACK((ecr).completion), 0 }
-
-
/* crypto.c */
extern int fscrypt_initialize(unsigned int cop_flags);
extern struct workqueue_struct *fscrypt_read_workqueue;
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
new file mode 100644
index 0000000..9f5fb2e
--- /dev/null
+++ b/fs/crypto/hooks.c
@@ -0,0 +1,112 @@
+/*
+ * fs/crypto/hooks.c
+ *
+ * Encryption hooks for higher-level filesystem operations.
+ */
+
+#include <linux/ratelimit.h>
+#include "fscrypt_private.h"
+
+/**
+ * fscrypt_file_open - prepare to open a possibly-encrypted regular file
+ * @inode: the inode being opened
+ * @filp: the struct file being set up
+ *
+ * Currently, an encrypted regular file can only be opened if its encryption key
+ * is available; access to the raw encrypted contents is not supported.
+ * Therefore, we first set up the inode's encryption key (if not already done)
+ * and return an error if it's unavailable.
+ *
+ * We also verify that if the parent directory (from the path via which the file
+ * is being opened) is encrypted, then the inode being opened uses the same
+ * encryption policy. This is needed as part of the enforcement that all files
+ * in an encrypted directory tree use the same encryption policy, as a
+ * protection against certain types of offline attacks. Note that this check is
+ * needed even when opening an *unencrypted* file, since it's forbidden to have
+ * an unencrypted file in an encrypted directory.
+ *
+ * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code
+ */
+int fscrypt_file_open(struct inode *inode, struct file *filp)
+{
+ int err;
+ struct dentry *dir;
+
+ err = fscrypt_require_key(inode);
+ if (err)
+ return err;
+
+ dir = dget_parent(file_dentry(filp));
+ if (IS_ENCRYPTED(d_inode(dir)) &&
+ !fscrypt_has_permitted_context(d_inode(dir), inode)) {
+ pr_warn_ratelimited("fscrypt: inconsistent encryption contexts: %lu/%lu",
+ d_inode(dir)->i_ino, inode->i_ino);
+ err = -EPERM;
+ }
+ dput(dir);
+ return err;
+}
+EXPORT_SYMBOL_GPL(fscrypt_file_open);
+
+int __fscrypt_prepare_link(struct inode *inode, struct inode *dir)
+{
+ int err;
+
+ err = fscrypt_require_key(dir);
+ if (err)
+ return err;
+
+ if (!fscrypt_has_permitted_context(dir, inode))
+ return -EPERM;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__fscrypt_prepare_link);
+
+int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ int err;
+
+ err = fscrypt_require_key(old_dir);
+ if (err)
+ return err;
+
+ err = fscrypt_require_key(new_dir);
+ if (err)
+ return err;
+
+ if (old_dir != new_dir) {
+ if (IS_ENCRYPTED(new_dir) &&
+ !fscrypt_has_permitted_context(new_dir,
+ d_inode(old_dentry)))
+ return -EPERM;
+
+ if ((flags & RENAME_EXCHANGE) &&
+ IS_ENCRYPTED(old_dir) &&
+ !fscrypt_has_permitted_context(old_dir,
+ d_inode(new_dentry)))
+ return -EPERM;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__fscrypt_prepare_rename);
+
+int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry)
+{
+ int err = fscrypt_get_encryption_info(dir);
+
+ if (err)
+ return err;
+
+ if (fscrypt_has_encryption_key(dir)) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY;
+ spin_unlock(&dentry->d_lock);
+ }
+
+ d_set_d_op(dentry, &fscrypt_d_ops);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup);
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 8e704d1..c891d2e 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -17,17 +17,6 @@
static struct crypto_shash *essiv_hash_tfm;
-static void derive_crypt_complete(struct crypto_async_request *req, int rc)
-{
- struct fscrypt_completion_result *ecr = req->data;
-
- if (rc == -EINPROGRESS)
- return;
-
- ecr->res = rc;
- complete(&ecr->completion);
-}
-
/**
* derive_key_aes() - Derive a key using AES-128-ECB
* @deriving_key: Encryption key used for derivation.
@@ -42,7 +31,7 @@
{
int res = 0;
struct skcipher_request *req = NULL;
- DECLARE_FS_COMPLETION_RESULT(ecr);
+ DECLARE_CRYPTO_WAIT(wait);
struct scatterlist src_sg, dst_sg;
struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
@@ -59,7 +48,7 @@
}
skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- derive_crypt_complete, &ecr);
+ crypto_req_done, &wait);
res = crypto_skcipher_setkey(tfm, deriving_key,
FS_AES_128_ECB_KEY_SIZE);
if (res < 0)
@@ -69,11 +58,7 @@
sg_init_one(&dst_sg, derived_raw_key, source_key->size);
skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size,
NULL);
- res = crypto_skcipher_encrypt(req);
- if (res == -EINPROGRESS || res == -EBUSY) {
- wait_for_completion(&ecr.completion);
- res = ecr.res;
- }
+ res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
out:
skcipher_request_free(req);
crypto_free_skcipher(tfm);
@@ -273,7 +258,7 @@
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0) {
if (!fscrypt_dummy_context_enabled(inode) ||
- inode->i_sb->s_cop->is_encrypted(inode))
+ IS_ENCRYPTED(inode))
return res;
/* Fake up a context for an unencrypted directory */
memset(&ctx, 0, sizeof(ctx));
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 9914d51..2f2c53f 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -109,7 +109,7 @@
struct fscrypt_policy policy;
int res;
- if (!inode->i_sb->s_cop->is_encrypted(inode))
+ if (!IS_ENCRYPTED(inode))
return -ENODATA;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
@@ -166,11 +166,11 @@
return 1;
/* No restrictions if the parent directory is unencrypted */
- if (!cops->is_encrypted(parent))
+ if (!IS_ENCRYPTED(parent))
return 1;
/* Encrypted directories must not contain unencrypted files */
- if (!cops->is_encrypted(child))
+ if (!IS_ENCRYPTED(child))
return 0;
/*
diff --git a/fs/dax.c b/fs/dax.c
index bf6218d..800748f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1265,6 +1265,17 @@
if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
return -EIO;
+ /*
+ * Write can allocate block for an area which has a hole page mapped
+ * into page tables. We have to tear down these mappings so that data
+ * written by write(2) is visible in mmap.
+ */
+ if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) {
+ invalidate_inode_pages2_range(inode->i_mapping,
+ pos >> PAGE_SHIFT,
+ (end - 1) >> PAGE_SHIFT);
+ }
+
while (pos < end) {
unsigned offset = pos & (PAGE_SIZE - 1);
struct blk_dax_ctl dax = { 0 };
@@ -1329,23 +1340,6 @@
if (iov_iter_rw(iter) == WRITE)
flags |= IOMAP_WRITE;
- /*
- * Yes, even DAX files can have page cache attached to them: A zeroed
- * page is inserted into the pagecache when we have to serve a write
- * fault on a hole. It should never be dirtied and can simply be
- * dropped from the pagecache once we get real data for the page.
- *
- * XXX: This is racy against mmap, and there's nothing we can do about
- * it. We'll eventually need to shift this down even further so that
- * we can check if we allocated blocks over a hole first.
- */
- if (mapping->nrpages) {
- ret = invalidate_inode_pages2_range(mapping,
- pos >> PAGE_SHIFT,
- (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
- WARN_ON_ONCE(ret);
- }
-
while (iov_iter_count(iter)) {
ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
iter, iomap_dax_actor);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index be2d9ae..c175391 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -32,17 +32,15 @@
#include <linux/percpu_counter.h>
#include <linux/ratelimit.h>
#include <crypto/hash.h>
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-#include <linux/fscrypt_supp.h>
-#else
-#include <linux/fscrypt_notsupp.h>
-#endif
#include <linux/falloc.h>
#include <linux/percpu-rwsem.h>
#ifdef __KERNEL__
#include <linux/compat.h>
#endif
+#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_EXT4_FS_ENCRYPTION)
+#include <linux/fscrypt.h>
+
/*
* The fourth extended filesystem constants/structures
*/
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a77cbc5..1a0c571 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4731,6 +4731,7 @@
EXT4_INODE_EOFBLOCKS);
}
ext4_mark_inode_dirty(handle, inode);
+ ext4_update_inode_fsync_trans(handle, inode, 1);
ret2 = ext4_journal_stop(handle);
if (ret2)
break;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d887e32..74c49b5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4443,8 +4443,11 @@
new_fl |= S_DIRSYNC;
if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
new_fl |= S_DAX;
+ if (flags & EXT4_ENCRYPT_FL)
+ new_fl |= S_ENCRYPTED;
inode_set_flags(inode, new_fl,
- S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
+ S_ENCRYPTED);
}
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index c30889b..8338cd4 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1387,6 +1387,10 @@
"falling back\n"));
}
nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
+ if (!nblocks) {
+ ret = NULL;
+ goto cleanup_and_exit;
+ }
start = EXT4_I(dir)->i_dir_start_lookup;
if (start >= nblocks)
start = 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6c8f2bd..0440bef 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1130,7 +1130,8 @@
ext4_clear_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
/*
- * Update inode->i_flags - e.g. S_DAX may get disabled
+ * Update inode->i_flags - S_ENCRYPTED will be enabled,
+ * S_DAX may be disabled
*/
ext4_set_inode_flags(inode);
}
@@ -1151,7 +1152,10 @@
ctx, len, 0);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
- /* Update inode->i_flags - e.g. S_DAX may get disabled */
+ /*
+ * Update inode->i_flags - S_ENCRYPTED will be enabled,
+ * S_DAX may be disabled
+ */
ext4_set_inode_flags(inode);
res = ext4_mark_inode_dirty(handle, inode);
if (res)
@@ -1166,7 +1170,7 @@
return res;
}
-static int ext4_dummy_context(struct inode *inode)
+static bool ext4_dummy_context(struct inode *inode)
{
return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
}
@@ -1182,14 +1186,9 @@
.get_context = ext4_get_context,
.set_context = ext4_set_context,
.dummy_context = ext4_dummy_context,
- .is_encrypted = ext4_encrypted_inode,
.empty_dir = ext4_empty_dir,
.max_namelen = ext4_max_namelen,
};
-#else
-static const struct fscrypt_operations ext4_cryptops = {
- .is_encrypted = ext4_encrypted_inode,
-};
#endif
#ifdef CONFIG_QUOTA
@@ -3919,7 +3918,9 @@
sb->s_op = &ext4_sops;
sb->s_export_op = &ext4_export_ops;
sb->s_xattr = ext4_xattr_handlers;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
sb->s_cop = &ext4_cryptops;
+#endif
#ifdef CONFIG_QUOTA
sb->dq_op = &ext4_quota_operations;
if (ext4_has_feature_quota(sb))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 341dbe5..d156dee 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -24,13 +24,11 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/quotaops.h>
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-#include <linux/fscrypt_supp.h>
-#else
-#include <linux/fscrypt_notsupp.h>
-#endif
#include <crypto/hash.h>
+#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION)
+#include <linux/fscrypt.h>
+
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
#else
@@ -3096,6 +3094,7 @@
{
#ifdef CONFIG_F2FS_FS_ENCRYPTION
file_set_encrypt(inode);
+ inode->i_flags |= S_ENCRYPTED;
#endif
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 9684d535..b4c4f2b 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -43,8 +43,11 @@
new_fl |= S_NOATIME;
if (flags & FS_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
+ if (f2fs_encrypted_inode(inode))
+ new_fl |= S_ENCRYPTED;
inode_set_flags(inode, new_fl,
- S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|
+ S_ENCRYPTED);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f95ffde..8f364c5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1738,14 +1738,9 @@
.key_prefix = "f2fs:",
.get_context = f2fs_get_context,
.set_context = f2fs_set_context,
- .is_encrypted = f2fs_encrypted_inode,
.empty_dir = f2fs_empty_dir,
.max_namelen = f2fs_max_namelen,
};
-#else
-static const struct fscrypt_operations f2fs_cryptops = {
- .is_encrypted = f2fs_encrypted_inode,
-};
#endif
static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
@@ -2474,7 +2469,9 @@
#endif
sb->s_op = &f2fs_sops;
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
sb->s_cop = &f2fs_cryptops;
+#endif
sb->s_xattr = f2fs_xattr_handlers;
sb->s_export_op = &f2fs_export_ops;
sb->s_magic = F2FS_SUPER_MAGIC;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 1493ceb..ec03cf6 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -114,6 +114,10 @@
int who = arg;
type = PIDTYPE_PID;
if (who < 0) {
+ /* avoid overflow below */
+ if (who == INT_MIN)
+ return;
+
type = PIDTYPE_PGID;
who = -who;
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ffec69d..ad2e55d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -173,19 +173,33 @@
spin_unlock_bh(&wb->work_lock);
}
+static void finish_writeback_work(struct bdi_writeback *wb,
+ struct wb_writeback_work *work)
+{
+ struct wb_completion *done = work->done;
+
+ if (work->auto_free)
+ kfree(work);
+ if (done && atomic_dec_and_test(&done->cnt))
+ wake_up_all(&wb->bdi->wb_waitq);
+}
+
static void wb_queue_work(struct bdi_writeback *wb,
struct wb_writeback_work *work)
{
trace_writeback_queue(wb, work);
- spin_lock_bh(&wb->work_lock);
- if (!test_bit(WB_registered, &wb->state))
- goto out_unlock;
if (work->done)
atomic_inc(&work->done->cnt);
- list_add_tail(&work->list, &wb->work_list);
- mod_delayed_work(bdi_wq, &wb->dwork, 0);
-out_unlock:
+
+ spin_lock_bh(&wb->work_lock);
+
+ if (test_bit(WB_registered, &wb->state)) {
+ list_add_tail(&work->list, &wb->work_list);
+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
+ } else
+ finish_writeback_work(wb, work);
+
spin_unlock_bh(&wb->work_lock);
}
@@ -1875,16 +1889,9 @@
set_bit(WB_writeback_running, &wb->state);
while ((work = get_next_work_item(wb)) != NULL) {
- struct wb_completion *done = work->done;
-
trace_writeback_exec(wb, work);
-
wrote += wb_writeback(wb, work);
-
- if (work->auto_free)
- kfree(work);
- if (done && atomic_dec_and_test(&done->cnt))
- wake_up_all(&wb->bdi->wb_waitq);
+ finish_writeback_work(wb, work);
}
/*
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e23ff70..39c382f 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -256,7 +256,7 @@
goto out;
}
if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
- if (flags & GFS2_DIF_JDATA)
+ if (new_flags & GFS2_DIF_JDATA)
gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH);
error = filemap_fdatawrite(inode->i_mapping);
if (error)
@@ -264,6 +264,8 @@
error = filemap_fdatawait(inode->i_mapping);
if (error)
goto out;
+ if (new_flags & GFS2_DIF_JDATA)
+ gfs2_ordered_del_inode(ip);
}
error = gfs2_trans_begin(sdp, RES_DINODE, 0);
if (error)
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 78219d5..d6512cd 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -275,7 +275,7 @@
{
struct kernfs_open_file *of = kernfs_of(file);
const struct kernfs_ops *ops;
- size_t len;
+ ssize_t len;
char *buf;
if (of->atomic_write_len) {
diff --git a/fs/libfs.c b/fs/libfs.c
index 48826d4..9588780a 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -245,7 +245,8 @@
struct inode *root;
struct qstr d_name = QSTR_INIT(name, strlen(name));
- s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL);
+ s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER,
+ &init_user_ns, NULL);
if (IS_ERR(s))
return ERR_CAST(s);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d04ec381..1e5321d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1292,7 +1292,7 @@
return 0;
}
- error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ error = nfs_lookup_verify_inode(inode, flags);
dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
__func__, inode->i_ino, error ? "invalid" : "valid");
return !error;
@@ -1443,6 +1443,7 @@
const struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs4_lookup_revalidate,
+ .d_weak_revalidate = nfs_weak_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
.d_automount = nfs_d_automount,
@@ -2097,7 +2098,7 @@
if (new_inode != NULL)
nfs_drop_nlink(new_inode);
d_move(old_dentry, new_dentry);
- nfs_set_verifier(new_dentry,
+ nfs_set_verifier(old_dentry,
nfs_save_change_attribute(new_dir));
} else if (error == -ENOENT)
nfs_dentry_handle_enoent(old_dentry);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bd81bcf..1ac1593 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -787,10 +787,8 @@
spin_lock(&dreq->lock);
- if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
- dreq->flags = 0;
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
dreq->error = hdr->error;
- }
if (dreq->error == 0) {
nfs_direct_good_bytes(dreq, hdr);
if (nfs_write_need_commit(hdr)) {
diff --git a/fs/nfs/io.c b/fs/nfs/io.c
index 1fc5d1c..d18ccc1 100644
--- a/fs/nfs/io.c
+++ b/fs/nfs/io.c
@@ -98,7 +98,7 @@
{
if (!test_bit(NFS_INO_ODIRECT, &nfsi->flags)) {
set_bit(NFS_INO_ODIRECT, &nfsi->flags);
- nfs_wb_all(inode);
+ nfs_sync_mapping(inode->i_mapping);
}
}
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 074ac71..f6b0848 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1004,9 +1004,9 @@
server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
- if (server->rsize > server_resp_sz)
+ if (!server->rsize || server->rsize > server_resp_sz)
server->rsize = server_resp_sz;
- if (server->wsize > server_rqst_sz)
+ if (!server->wsize || server->wsize > server_rqst_sz)
server->wsize = server_rqst_sz;
#endif /* CONFIG_NFS_V4_1 */
}
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 835c163..eaac878 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -567,9 +567,13 @@
struct idmap_msg *im;
struct idmap *idmap = (struct idmap *)aux;
struct key *key = cons->key;
- int ret = -ENOMEM;
+ int ret = -ENOKEY;
+
+ if (!aux)
+ goto out1;
/* msg and im are freed in idmap_pipe_destroy_msg */
+ ret = -ENOMEM;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out1;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6784522..4638654 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -38,7 +38,6 @@
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/errno.h>
-#include <linux/file.h>
#include <linux/string.h>
#include <linux/ratelimit.h>
#include <linux/printk.h>
@@ -6006,7 +6005,6 @@
p->server = server;
atomic_inc(&lsp->ls_count);
p->ctx = get_nfs_open_context(ctx);
- get_file(fl->fl_file);
memcpy(&p->fl, fl, sizeof(p->fl));
return p;
out_free_seqid:
@@ -6119,7 +6117,6 @@
nfs_free_seqid(data->arg.lock_seqid);
nfs4_put_lock_state(data->lsp);
put_nfs_open_context(data->ctx);
- fput(data->fl.fl_file);
kfree(data);
dprintk("%s: done!\n", __func__);
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9267191..71deeae 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1718,7 +1718,6 @@
break;
case -NFS4ERR_STALE_CLIENTID:
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
- nfs4_state_clear_reclaim_reboot(clp);
nfs4_state_start_reclaim_reboot(clp);
break;
case -NFS4ERR_EXPIRED:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b7a07ba..b8e4474 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2145,7 +2145,7 @@
nfs_pageio_reset_write_mds(desc);
mirror->pg_recoalesce = 1;
}
- hdr->release(hdr);
+ hdr->completion_ops->completion(hdr);
}
static enum pnfs_try_status
@@ -2256,7 +2256,7 @@
nfs_pageio_reset_read_mds(desc);
mirror->pg_recoalesce = 1;
}
- hdr->release(hdr);
+ hdr->completion_ops->completion(hdr);
}
/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e4772a8..9a3b382 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1806,6 +1806,8 @@
set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
next:
nfs_unlock_and_release_request(req);
+ /* Latency breaker */
+ cond_resched();
}
nfss = NFS_SERVER(data->inode);
if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
@@ -1859,6 +1861,8 @@
if (res)
error = nfs_generic_commit_list(inode, &head, how, &cinfo);
nfs_commit_end(cinfo.mds);
+ if (res == 0)
+ return res;
if (error < 0)
goto out_error;
if (!may_wait)
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index fd8c9a5..77d136a 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -30,7 +30,11 @@
struct list_head *grace_list = net_generic(net, grace_net_id);
spin_lock(&grace_lock);
- list_add(&lm->list, grace_list);
+ if (list_empty(&lm->list))
+ list_add(&lm->list, grace_list);
+ else
+ WARN(1, "double list_add attempt detected in net %x %s\n",
+ net->ns.inum, (net == &init_net) ? "(init_net)" : "");
spin_unlock(&grace_lock);
}
EXPORT_SYMBOL_GPL(locks_start_grace);
@@ -104,7 +108,9 @@
{
struct list_head *grace_list = net_generic(net, grace_net_id);
- BUG_ON(!list_empty(grace_list));
+ WARN_ONCE(!list_empty(grace_list),
+ "net %x %s: grace_list is not empty\n",
+ net->ns.inum, __func__);
}
static struct pernet_operations grace_net_ops = {
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 62469c6..81c018e 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -60,6 +60,9 @@
else
gi->gid[i] = rqgi->gid[i];
}
+
+ /* Each thread allocates its own gi, no race */
+ groups_sort(gi);
} else {
gi = get_group_info(rqgi);
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ec2a69d..f463c4e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -63,12 +63,16 @@
static const stateid_t currentstateid = {
.si_generation = 1,
};
+static const stateid_t close_stateid = {
+ .si_generation = 0xffffffffU,
+};
static u64 current_sessionid = 1;
#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
#define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t)))
+#define CLOSE_STATEID(stateid) (!memcmp((stateid), &close_stateid, sizeof(stateid_t)))
/* forward declarations */
static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
@@ -3513,7 +3517,9 @@
/* ignore lock owners */
if (local->st_stateowner->so_is_open_owner == 0)
continue;
- if (local->st_stateowner == &oo->oo_owner) {
+ if (local->st_stateowner != &oo->oo_owner)
+ continue;
+ if (local->st_stid.sc_type == NFS4_OPEN_STID) {
ret = local;
atomic_inc(&ret->st_stid.sc_count);
break;
@@ -3522,6 +3528,52 @@
return ret;
}
+static __be32
+nfsd4_verify_open_stid(struct nfs4_stid *s)
+{
+ __be32 ret = nfs_ok;
+
+ switch (s->sc_type) {
+ default:
+ break;
+ case NFS4_CLOSED_STID:
+ case NFS4_CLOSED_DELEG_STID:
+ ret = nfserr_bad_stateid;
+ break;
+ case NFS4_REVOKED_DELEG_STID:
+ ret = nfserr_deleg_revoked;
+ }
+ return ret;
+}
+
+/* Lock the stateid st_mutex, and deal with races with CLOSE */
+static __be32
+nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)
+{
+ __be32 ret;
+
+ mutex_lock(&stp->st_mutex);
+ ret = nfsd4_verify_open_stid(&stp->st_stid);
+ if (ret != nfs_ok)
+ mutex_unlock(&stp->st_mutex);
+ return ret;
+}
+
+static struct nfs4_ol_stateid *
+nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
+{
+ struct nfs4_ol_stateid *stp;
+ for (;;) {
+ spin_lock(&fp->fi_lock);
+ stp = nfsd4_find_existing_open(fp, open);
+ spin_unlock(&fp->fi_lock);
+ if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok)
+ break;
+ nfs4_put_stid(&stp->st_stid);
+ }
+ return stp;
+}
+
static struct nfs4_openowner *
alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
struct nfsd4_compound_state *cstate)
@@ -3566,6 +3618,7 @@
mutex_init(&stp->st_mutex);
mutex_lock(&stp->st_mutex);
+retry:
spin_lock(&oo->oo_owner.so_client->cl_lock);
spin_lock(&fp->fi_lock);
@@ -3590,7 +3643,11 @@
spin_unlock(&fp->fi_lock);
spin_unlock(&oo->oo_owner.so_client->cl_lock);
if (retstp) {
- mutex_lock(&retstp->st_mutex);
+ /* Handle races with CLOSE */
+ if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) {
+ nfs4_put_stid(&retstp->st_stid);
+ goto retry;
+ }
/* To keep mutex tracking happy */
mutex_unlock(&stp->st_mutex);
stp = retstp;
@@ -4400,6 +4457,7 @@
struct nfs4_ol_stateid *stp = NULL;
struct nfs4_delegation *dp = NULL;
__be32 status;
+ bool new_stp = false;
/*
* Lookup file; if found, lookup stateid and check open request,
@@ -4411,9 +4469,7 @@
status = nfs4_check_deleg(cl, open, &dp);
if (status)
goto out;
- spin_lock(&fp->fi_lock);
- stp = nfsd4_find_existing_open(fp, open);
- spin_unlock(&fp->fi_lock);
+ stp = nfsd4_find_and_lock_existing_open(fp, open);
} else {
open->op_file = NULL;
status = nfserr_bad_stateid;
@@ -4421,35 +4477,31 @@
goto out;
}
+ if (!stp) {
+ stp = init_open_stateid(fp, open);
+ if (!open->op_stp)
+ new_stp = true;
+ }
+
/*
* OPEN the file, or upgrade an existing OPEN.
* If truncate fails, the OPEN fails.
+ *
+ * stp is already locked.
*/
- if (stp) {
+ if (!new_stp) {
/* Stateid was found, this is an OPEN upgrade */
- mutex_lock(&stp->st_mutex);
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
if (status) {
mutex_unlock(&stp->st_mutex);
goto out;
}
} else {
- /* stp is returned locked. */
- stp = init_open_stateid(fp, open);
- /* See if we lost the race to some other thread */
- if (stp->st_access_bmap != 0) {
- status = nfs4_upgrade_open(rqstp, fp, current_fh,
- stp, open);
- if (status) {
- mutex_unlock(&stp->st_mutex);
- goto out;
- }
- goto upgrade_out;
- }
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
if (status) {
- mutex_unlock(&stp->st_mutex);
+ stp->st_stid.sc_type = NFS4_CLOSED_STID;
release_open_stateid(stp);
+ mutex_unlock(&stp->st_mutex);
goto out;
}
@@ -4458,7 +4510,7 @@
if (stp->st_clnt_odstate == open->op_odstate)
open->op_odstate = NULL;
}
-upgrade_out:
+
nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
mutex_unlock(&stp->st_mutex);
@@ -4684,7 +4736,7 @@
spin_unlock(&nn->blocked_locks_lock);
while (!list_empty(&reaplist)) {
- nbl = list_first_entry(&nn->blocked_locks_lru,
+ nbl = list_first_entry(&reaplist,
struct nfsd4_blocked_lock, nbl_lru);
list_del_init(&nbl->nbl_lru);
posix_unblock_lock(&nbl->nbl_lock);
@@ -4818,7 +4870,8 @@
struct nfs4_stid *s;
__be32 status = nfserr_bad_stateid;
- if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
+ CLOSE_STATEID(stateid))
return status;
/* Client debugging aid. */
if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) {
@@ -4876,7 +4929,8 @@
else if (typemask & NFS4_DELEG_STID)
typemask |= NFS4_REVOKED_DELEG_STID;
- if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
+ CLOSE_STATEID(stateid))
return nfserr_bad_stateid;
status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn);
if (status == nfserr_stale_clientid) {
@@ -5127,15 +5181,9 @@
status = nfsd4_check_seqid(cstate, sop, seqid);
if (status)
return status;
- if (stp->st_stid.sc_type == NFS4_CLOSED_STID
- || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID)
- /*
- * "Closed" stateid's exist *only* to return
- * nfserr_replay_me from the previous step, and
- * revoked delegations are kept only for free_stateid.
- */
- return nfserr_bad_stateid;
- mutex_lock(&stp->st_mutex);
+ status = nfsd4_lock_ol_stateid(stp);
+ if (status != nfs_ok)
+ return status;
status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
if (status == nfs_ok)
status = nfs4_check_fh(current_fh, &stp->st_stid);
@@ -5314,7 +5362,6 @@
bool unhashed;
LIST_HEAD(reaplist);
- s->st_stid.sc_type = NFS4_CLOSED_STID;
spin_lock(&clp->cl_lock);
unhashed = unhash_open_stateid(s, &reaplist);
@@ -5353,10 +5400,17 @@
nfsd4_bump_seqid(cstate, status);
if (status)
goto out;
+
+ stp->st_stid.sc_type = NFS4_CLOSED_STID;
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
- mutex_unlock(&stp->st_mutex);
nfsd4_close_open_stateid(stp);
+ mutex_unlock(&stp->st_mutex);
+
+ /* See RFC5661 sectionm 18.2.4 */
+ if (stp->st_stid.sc_client->cl_minorversion)
+ memcpy(&close->cl_stateid, &close_stateid,
+ sizeof(close->cl_stateid));
/* put reference from nfs4_preprocess_seqid_op */
nfs4_put_stid(&stp->st_stid);
@@ -6958,6 +7012,10 @@
INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
nn->conf_name_tree = RB_ROOT;
nn->unconf_name_tree = RB_ROOT;
+ nn->boot_time = get_seconds();
+ nn->grace_ended = false;
+ nn->nfsd4_manager.block_opens = true;
+ INIT_LIST_HEAD(&nn->nfsd4_manager.list);
INIT_LIST_HEAD(&nn->client_lru);
INIT_LIST_HEAD(&nn->close_lru);
INIT_LIST_HEAD(&nn->del_recall_lru);
@@ -7015,9 +7073,6 @@
ret = nfs4_state_create_net(net);
if (ret)
return ret;
- nn->boot_time = get_seconds();
- nn->grace_ended = false;
- nn->nfsd4_manager.block_opens = true;
locks_start_grace(net, &nn->nfsd4_manager);
nfsd4_client_tracking_init(net);
printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
@@ -7094,7 +7149,7 @@
spin_unlock(&nn->blocked_locks_lock);
while (!list_empty(&reaplist)) {
- nbl = list_first_entry(&nn->blocked_locks_lru,
+ nbl = list_first_entry(&reaplist,
struct nfsd4_blocked_lock, nbl_lru);
list_del_init(&nbl->nbl_lru);
posix_unblock_lock(&nbl->nbl_lock);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 1645b97..5c48006 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -155,7 +155,8 @@
int nfsd_minorversion(u32 minorversion, enum vers_op change)
{
- if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
+ if (minorversion > NFSD_SUPPORTED_MINOR_VERSION &&
+ change != NFSD_AVAIL)
return -1;
switch(change) {
case NFSD_SET:
@@ -399,23 +400,20 @@
void nfsd_reset_versions(void)
{
- int found_one = 0;
int i;
- for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
- if (nfsd_program.pg_vers[i])
- found_one = 1;
- }
+ for (i = 0; i < NFSD_NRVERS; i++)
+ if (nfsd_vers(i, NFSD_TEST))
+ return;
- if (!found_one) {
- for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
- nfsd_program.pg_vers[i] = nfsd_version[i];
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
- for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
- nfsd_acl_program.pg_vers[i] =
- nfsd_acl_version[i];
-#endif
- }
+ for (i = 0; i < NFSD_NRVERS; i++)
+ if (i != 4)
+ nfsd_vers(i, NFSD_SET);
+ else {
+ int minor = 0;
+ while (nfsd_minorversion(minor, NFSD_SET) >= 0)
+ minor++;
+ }
}
/*
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 8718af8..80fdfad 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -90,6 +90,7 @@
return ERR_PTR(-ENOMEM);
}
d_instantiate(dentry, inode);
+ dentry->d_flags |= DCACHE_RCUACCESS;
dentry->d_fsdata = (void *)ns->ops;
d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
if (d) {
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index fe2cbeb..939aa06 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -161,7 +161,7 @@
struct orangefs_kernel_op_s *op, *temp;
__s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
- struct orangefs_kernel_op_s *cur_op = NULL;
+ struct orangefs_kernel_op_s *cur_op;
unsigned long ret;
/* We do not support blocking IO. */
@@ -181,6 +181,7 @@
}
restart:
+ cur_op = NULL;
/* Get next op (if any) from top of list. */
spin_lock(&orangefs_request_list_lock);
list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 02cc613..5b2cbe5 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -446,7 +446,7 @@
static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
- loff_t pos = *(&iocb->ki_pos);
+ loff_t pos = iocb->ki_pos;
ssize_t rc = 0;
BUG_ON(iocb->private);
@@ -485,9 +485,6 @@
}
}
- if (file->f_pos > i_size_read(file->f_mapping->host))
- orangefs_i_size_write(file->f_mapping->host, file->f_pos);
-
rc = generic_write_checks(iocb, iter);
if (rc <= 0) {
@@ -501,7 +498,7 @@
* pos to the end of the file, so we will wait till now to set
* pos...
*/
- pos = *(&iocb->ki_pos);
+ pos = iocb->ki_pos;
rc = do_readv_writev(ORANGEFS_IO_WRITE,
file,
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 45dd8f2..f28381a 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -570,17 +570,6 @@
sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE; \
} while (0)
-static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size)
-{
-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
- inode_lock(inode);
-#endif
- i_size_write(inode, i_size);
-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
- inode_unlock(inode);
-#endif
-}
-
static inline void orangefs_set_timeout(struct dentry *dentry)
{
unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c
index abcfa3f..f61b008 100644
--- a/fs/orangefs/waitqueue.c
+++ b/fs/orangefs/waitqueue.c
@@ -28,10 +28,10 @@
*/
void purge_waiting_ops(void)
{
- struct orangefs_kernel_op_s *op;
+ struct orangefs_kernel_op_s *op, *tmp;
spin_lock(&orangefs_request_list_lock);
- list_for_each_entry(op, &orangefs_request_list, list) {
+ list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) {
gossip_debug(GOSSIP_WAIT_DEBUG,
"pvfs2-client-core: purging op tag %llu %s\n",
llu(op->tag),
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index f241b4e..a1be6ba 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -434,10 +434,14 @@
struct dentry *dentry = file->f_path.dentry;
struct file *realfile = od->realfile;
+ /* Nothing to sync for lower */
+ if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
+ return 0;
+
/*
* Need to check if we started out being a lower dir, but got copied up
*/
- if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
+ if (!od->is_upper) {
struct inode *inode = file_inode(file);
realfile = lockless_dereference(od->upperfile);
diff --git a/fs/pipe.c b/fs/pipe.c
index 8e0d9f2..3434553 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -609,12 +609,17 @@
static bool too_many_pipe_buffers_soft(unsigned long user_bufs)
{
- return pipe_user_pages_soft && user_bufs >= pipe_user_pages_soft;
+ return pipe_user_pages_soft && user_bufs > pipe_user_pages_soft;
}
static bool too_many_pipe_buffers_hard(unsigned long user_bufs)
{
- return pipe_user_pages_hard && user_bufs >= pipe_user_pages_hard;
+ return pipe_user_pages_hard && user_bufs > pipe_user_pages_hard;
+}
+
+static bool is_unprivileged_user(void)
+{
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
}
struct pipe_inode_info *alloc_pipe_info(void)
@@ -633,12 +638,12 @@
user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
- if (too_many_pipe_buffers_soft(user_bufs)) {
+ if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) {
user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
pipe_bufs = 1;
}
- if (too_many_pipe_buffers_hard(user_bufs))
+ if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user())
goto out_revert_acct;
pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
@@ -1018,13 +1023,19 @@
/*
* Currently we rely on the pipe array holding a power-of-2 number
- * of pages.
+ * of pages. Returns 0 on error.
*/
static inline unsigned int round_pipe_size(unsigned int size)
{
unsigned long nr_pages;
+ if (size < pipe_min_size)
+ size = pipe_min_size;
+
nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (nr_pages == 0)
+ return 0;
+
return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
}
@@ -1040,6 +1051,8 @@
long ret = 0;
size = round_pipe_size(arg);
+ if (size == 0)
+ return -EINVAL;
nr_pages = size >> PAGE_SHIFT;
if (!nr_pages)
@@ -1061,7 +1074,7 @@
if (nr_pages > pipe->buffers &&
(too_many_pipe_buffers_hard(user_bufs) ||
too_many_pipe_buffers_soft(user_bufs)) &&
- !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+ is_unprivileged_user()) {
ret = -EPERM;
goto out_revert_acct;
}
@@ -1123,13 +1136,18 @@
int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
size_t *lenp, loff_t *ppos)
{
+ unsigned int rounded_pipe_max_size;
int ret;
ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
if (ret < 0 || !write)
return ret;
- pipe_max_size = round_pipe_size(pipe_max_size);
+ rounded_pipe_max_size = round_pipe_size(pipe_max_size);
+ if (rounded_pipe_max_size == 0)
+ return -EINVAL;
+
+ pipe_max_size = rounded_pipe_max_size;
return ret;
}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index c932ec4..794b52a 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -423,8 +423,11 @@
* safe because the task has stopped executing permanently.
*/
if (permitted && (task->flags & PF_DUMPCORE)) {
- eip = KSTK_EIP(task);
- esp = KSTK_ESP(task);
+ if (try_get_task_stack(task)) {
+ eip = KSTK_EIP(task);
+ esp = KSTK_ESP(task);
+ put_task_stack(task);
+ }
}
}
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 5c89a07..df7e079 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -507,23 +507,15 @@
return -EFAULT;
} else {
if (kern_addr_valid(start)) {
- unsigned long n;
-
/*
* Using bounce buffer to bypass the
* hardened user copy kernel text checks.
*/
- memcpy(buf, (char *) start, tsz);
- n = copy_to_user(buffer, buf, tsz);
- /*
- * We cannot distinguish between fault on source
- * and fault on destination. When this happens
- * we clear too and hope it will trigger the
- * EFAULT again.
- */
- if (n) {
- if (clear_user(buffer + tsz - n,
- n))
+ if (probe_kernel_read(buf, (void *) start, tsz)) {
+ if (clear_user(buffer, tsz))
+ return -EFAULT;
+ } else {
+ if (copy_to_user(buffer, buf, tsz))
return -EFAULT;
}
} else {
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 15f327b..7340c36 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -14,6 +14,7 @@
#include <linux/tty.h>
#include <linux/seq_file.h>
#include <linux/bitops.h>
+#include "internal.h"
/*
* The /proc/tty directory inodes...
@@ -164,7 +165,7 @@
if (!ent)
return;
- remove_proc_entry(driver->driver_name, proc_tty_driver);
+ remove_proc_entry(ent->name, proc_tty_driver);
driver->proc_entry = NULL;
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 1bfac28..f9246ac 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2985,7 +2985,8 @@
pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld,"
" %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order));
- register_shrinker(&dqcache_shrinker);
+ if (register_shrinker(&dqcache_shrinker))
+ panic("Cannot register dquot shrinker");
return 0;
}
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index dc198bc..edc8ef7 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -513,9 +513,17 @@
"inode has negative prealloc blocks count.");
#endif
while (ei->i_prealloc_count > 0) {
- reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
- ei->i_prealloc_block++;
+ b_blocknr_t block_to_free;
+
+ /*
+ * reiserfs_free_prealloc_block can drop the write lock,
+ * which could allow another caller to free the same block.
+ * We can protect against it by modifying the prealloc
+ * state before calling it.
+ */
+ block_to_free = ei->i_prealloc_block++;
ei->i_prealloc_count--;
+ reiserfs_free_prealloc_block(th, inode, block_to_free);
dirty = 1;
}
if (dirty)
@@ -1128,7 +1136,7 @@
hint->prealloc_size = 0;
if (!hint->formatted_node && hint->preallocate) {
- if (S_ISREG(hint->inode->i_mode)
+ if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode)
&& hint->inode->i_size >=
REISERFS_SB(hint->th->t_super)->s_alloc_options.
preallocmin * hint->inode->i_sb->s_blocksize)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 0a6ad4e..e101d70 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2521,7 +2521,6 @@
return err;
if (inode->i_size < off + len - towrite)
i_size_write(inode, off + len - towrite);
- inode->i_version++;
inode->i_mtime = inode->i_ctime = current_time(inode);
mark_inode_dirty(inode);
return len - towrite;
diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c
index fffaad4..0b3b223 100644
--- a/fs/sdcardfs/derived_perm.c
+++ b/fs/sdcardfs/derived_perm.c
@@ -32,23 +32,20 @@
ci->data->under_android = pi->data->under_android;
ci->data->under_cache = pi->data->under_cache;
ci->data->under_obb = pi->data->under_obb;
- set_top(ci, pi->top_data);
}
/* helper function for derived state */
void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid,
- uid_t uid, bool under_android,
- struct sdcardfs_inode_data *top)
+ uid_t uid)
{
struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
info->data->perm = perm;
info->data->userid = userid;
info->data->d_uid = uid;
- info->data->under_android = under_android;
+ info->data->under_android = false;
info->data->under_cache = false;
info->data->under_obb = false;
- set_top(info, top);
}
/* While renaming, there is a point where we want the path from dentry,
@@ -58,8 +55,8 @@
const struct qstr *name)
{
struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry));
- struct sdcardfs_inode_data *parent_data =
- SDCARDFS_I(d_inode(parent))->data;
+ struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent));
+ struct sdcardfs_inode_data *parent_data = parent_info->data;
appid_t appid;
unsigned long user_num;
int err;
@@ -80,13 +77,15 @@
inherit_derived_state(d_inode(parent), d_inode(dentry));
/* Files don't get special labels */
- if (!S_ISDIR(d_inode(dentry)->i_mode))
+ if (!S_ISDIR(d_inode(dentry)->i_mode)) {
+ set_top(info, parent_info);
return;
+ }
/* Derive custom permissions based on parent and current node */
switch (parent_data->perm) {
case PERM_INHERIT:
case PERM_ANDROID_PACKAGE_CACHE:
- /* Already inherited above */
+ set_top(info, parent_info);
break;
case PERM_PRE_ROOT:
/* Legacy internal layout places users at top level */
@@ -96,7 +95,6 @@
info->data->userid = 0;
else
info->data->userid = user_num;
- set_top(info, info->data);
break;
case PERM_ROOT:
/* Assume masked off by default. */
@@ -104,24 +102,24 @@
/* App-specific directories inside; let anyone traverse */
info->data->perm = PERM_ANDROID;
info->data->under_android = true;
- set_top(info, info->data);
+ } else {
+ set_top(info, parent_info);
}
break;
case PERM_ANDROID:
if (qstr_case_eq(name, &q_data)) {
/* App-specific directories inside; let anyone traverse */
info->data->perm = PERM_ANDROID_DATA;
- set_top(info, info->data);
} else if (qstr_case_eq(name, &q_obb)) {
/* App-specific directories inside; let anyone traverse */
info->data->perm = PERM_ANDROID_OBB;
info->data->under_obb = true;
- set_top(info, info->data);
/* Single OBB directory is always shared */
} else if (qstr_case_eq(name, &q_media)) {
/* App-specific directories inside; let anyone traverse */
info->data->perm = PERM_ANDROID_MEDIA;
- set_top(info, info->data);
+ } else {
+ set_top(info, parent_info);
}
break;
case PERM_ANDROID_OBB:
@@ -132,13 +130,13 @@
if (appid != 0 && !is_excluded(name->name, parent_data->userid))
info->data->d_uid =
multiuser_get_uid(parent_data->userid, appid);
- set_top(info, info->data);
break;
case PERM_ANDROID_PACKAGE:
if (qstr_case_eq(name, &q_cache)) {
info->data->perm = PERM_ANDROID_PACKAGE_CACHE;
info->data->under_cache = true;
}
+ set_top(info, parent_info);
break;
}
}
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
index 4a971e2..9cdb396 100644
--- a/fs/sdcardfs/inode.c
+++ b/fs/sdcardfs/inode.c
@@ -629,6 +629,8 @@
struct inode tmp;
struct sdcardfs_inode_data *top = top_data_get(SDCARDFS_I(inode));
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
if (!top)
return -EINVAL;
@@ -645,7 +647,7 @@
*/
copy_attrs(&tmp, inode);
tmp.i_uid = make_kuid(&init_user_ns, top->d_uid);
- tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, top));
+ tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, inode->i_sb, top));
tmp.i_mode = (inode->i_mode & S_IFMT)
| get_mode(mnt, SDCARDFS_I(inode), top);
data_put(top);
@@ -724,7 +726,7 @@
*/
copy_attrs(&tmp, inode);
tmp.i_uid = make_kuid(&init_user_ns, top->d_uid);
- tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, top));
+ tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, dentry->d_sb, top));
tmp.i_mode = (inode->i_mode & S_IFMT)
| get_mode(mnt, SDCARDFS_I(inode), top);
tmp.i_size = i_size_read(inode);
@@ -821,11 +823,12 @@
return err;
}
-static int sdcardfs_fillattr(struct vfsmount *mnt,
- struct inode *inode, struct kstat *stat)
+static int sdcardfs_fillattr(struct vfsmount *mnt, struct inode *inode,
+ struct kstat *lower_stat, struct kstat *stat)
{
struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
struct sdcardfs_inode_data *top = top_data_get(info);
+ struct super_block *sb = inode->i_sb;
if (!top)
return -EINVAL;
@@ -835,14 +838,14 @@
stat->mode = (inode->i_mode & S_IFMT) | get_mode(mnt, info, top);
stat->nlink = inode->i_nlink;
stat->uid = make_kuid(&init_user_ns, top->d_uid);
- stat->gid = make_kgid(&init_user_ns, get_gid(mnt, top));
+ stat->gid = make_kgid(&init_user_ns, get_gid(mnt, sb, top));
stat->rdev = inode->i_rdev;
- stat->size = i_size_read(inode);
- stat->atime = inode->i_atime;
- stat->mtime = inode->i_mtime;
- stat->ctime = inode->i_ctime;
- stat->blksize = (1 << inode->i_blkbits);
- stat->blocks = inode->i_blocks;
+ stat->size = lower_stat->size;
+ stat->atime = lower_stat->atime;
+ stat->mtime = lower_stat->mtime;
+ stat->ctime = lower_stat->ctime;
+ stat->blksize = lower_stat->blksize;
+ stat->blocks = lower_stat->blocks;
data_put(top);
return 0;
}
@@ -868,8 +871,7 @@
goto out;
sdcardfs_copy_and_fix_attrs(d_inode(dentry),
d_inode(lower_path.dentry));
- err = sdcardfs_fillattr(mnt, d_inode(dentry), stat);
- stat->blocks = lower_stat.blocks;
+ err = sdcardfs_fillattr(mnt, d_inode(dentry), &lower_stat, stat);
out:
sdcardfs_put_lower_path(dentry, &lower_path);
return err;
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
index 0a2b516..e4fd3fb 100644
--- a/fs/sdcardfs/main.c
+++ b/fs/sdcardfs/main.c
@@ -33,6 +33,7 @@
Opt_userid,
Opt_reserved_mb,
Opt_gid_derivation,
+ Opt_default_normal,
Opt_err,
};
@@ -45,6 +46,7 @@
{Opt_userid, "userid=%d"},
{Opt_multiuser, "multiuser"},
{Opt_gid_derivation, "derive_gid"},
+ {Opt_default_normal, "default_normal"},
{Opt_reserved_mb, "reserved_mb=%u"},
{Opt_err, NULL}
};
@@ -68,6 +70,7 @@
opts->reserved_mb = 0;
/* by default, gid derivation is off */
opts->gid_derivation = false;
+ opts->default_normal = false;
*debug = 0;
@@ -122,6 +125,9 @@
case Opt_gid_derivation:
opts->gid_derivation = true;
break;
+ case Opt_default_normal:
+ opts->default_normal = true;
+ break;
/* unknown option */
default:
if (!silent)
@@ -175,6 +181,7 @@
return 0;
vfsopts->mask = option;
break;
+ case Opt_default_normal:
case Opt_multiuser:
case Opt_userid:
case Opt_fsuid:
@@ -334,13 +341,11 @@
mutex_lock(&sdcardfs_super_list_lock);
if (sb_info->options.multiuser) {
setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT,
- sb_info->options.fs_user_id, AID_ROOT,
- false, SDCARDFS_I(d_inode(sb->s_root))->data);
+ sb_info->options.fs_user_id, AID_ROOT);
snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name);
} else {
setup_derived_state(d_inode(sb->s_root), PERM_ROOT,
- sb_info->options.fs_user_id, AID_ROOT,
- false, SDCARDFS_I(d_inode(sb->s_root))->data);
+ sb_info->options.fs_user_id, AID_ROOT);
snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name);
}
fixup_tmp_permissions(d_inode(sb->s_root));
diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
index d1d8bab..610466a 100644
--- a/fs/sdcardfs/sdcardfs.h
+++ b/fs/sdcardfs/sdcardfs.h
@@ -201,6 +201,7 @@
struct sdcardfs_inode_data *data;
/* top folder for ownership */
+ spinlock_t top_lock;
struct sdcardfs_inode_data *top_data;
struct inode vfs_inode;
@@ -220,6 +221,7 @@
userid_t fs_user_id;
bool multiuser;
bool gid_derivation;
+ bool default_normal;
unsigned int reserved_mb;
};
@@ -379,7 +381,12 @@
static inline struct sdcardfs_inode_data *top_data_get(
struct sdcardfs_inode_info *info)
{
- return data_get(info->top_data);
+ struct sdcardfs_inode_data *top_data;
+
+ spin_lock(&info->top_lock);
+ top_data = data_get(info->top_data);
+ spin_unlock(&info->top_lock);
+ return top_data;
}
extern void data_release(struct kref *ref);
@@ -401,23 +408,30 @@
}
static inline void set_top(struct sdcardfs_inode_info *info,
- struct sdcardfs_inode_data *top)
+ struct sdcardfs_inode_info *top_owner)
{
- struct sdcardfs_inode_data *old_top = info->top_data;
+ struct sdcardfs_inode_data *old_top;
+ struct sdcardfs_inode_data *new_top = NULL;
- if (top)
- data_get(top);
- info->top_data = top;
+ if (top_owner)
+ new_top = top_data_get(top_owner);
+
+ spin_lock(&info->top_lock);
+ old_top = info->top_data;
+ info->top_data = new_top;
if (old_top)
data_put(old_top);
+ spin_unlock(&info->top_lock);
}
static inline int get_gid(struct vfsmount *mnt,
+ struct super_block *sb,
struct sdcardfs_inode_data *data)
{
- struct sdcardfs_vfsmount_options *opts = mnt->data;
+ struct sdcardfs_vfsmount_options *vfsopts = mnt->data;
+ struct sdcardfs_sb_info *sbi = SDCARDFS_SB(sb);
- if (opts->gid == AID_SDCARD_RW)
+ if (vfsopts->gid == AID_SDCARD_RW && !sbi->options.default_normal)
/* As an optimization, certain trusted system components only run
* as owner but operate across all users. Since we're now handing
* out the sdcard_rw GID only to trusted apps, we're okay relaxing
@@ -426,7 +440,7 @@
*/
return AID_SDCARD_RW;
else
- return multiuser_get_uid(data->userid, opts->gid);
+ return multiuser_get_uid(data->userid, vfsopts->gid);
}
static inline int get_mode(struct vfsmount *mnt,
@@ -513,8 +527,7 @@
};
extern void setup_derived_state(struct inode *inode, perm_t perm,
- userid_t userid, uid_t uid, bool under_android,
- struct sdcardfs_inode_data *top);
+ userid_t userid, uid_t uid);
extern void get_derived_permission(struct dentry *parent, struct dentry *dentry);
extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name);
extern void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit);
diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c
index b89947d..cffcdb1 100644
--- a/fs/sdcardfs/super.c
+++ b/fs/sdcardfs/super.c
@@ -215,6 +215,9 @@
i->data = d;
kref_init(&d->refcount);
+ i->top_data = d;
+ spin_lock_init(&i->top_lock);
+ kref_get(&d->refcount);
i->vfs_inode.i_version = 1;
return &i->vfs_inode;
@@ -304,6 +307,8 @@
seq_printf(m, ",userid=%u", opts->fs_user_id);
if (opts->gid_derivation)
seq_puts(m, ",derive_gid");
+ if (opts->default_normal)
+ seq_puts(m, ",default_normal");
if (opts->reserved_mb != 0)
seq_printf(m, ",reserved=%uMB", opts->reserved_mb);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index d9f9615..3979d76 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -270,7 +270,8 @@
}
static int __ubifs_setxattr(struct inode *host, const char *name,
- const void *value, size_t size, int flags)
+ const void *value, size_t size, int flags,
+ bool check_lock)
{
struct inode *inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
@@ -279,7 +280,8 @@
union ubifs_key key;
int err;
- ubifs_assert(inode_is_locked(host));
+ if (check_lock)
+ ubifs_assert(inode_is_locked(host));
if (size > UBIFS_MAX_INO_DATA)
return -ERANGE;
@@ -548,7 +550,8 @@
}
strcpy(name, XATTR_SECURITY_PREFIX);
strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
- err = __ubifs_setxattr(inode, name, xattr->value, xattr->value_len, 0);
+ err = __ubifs_setxattr(inode, name, xattr->value,
+ xattr->value_len, 0, false);
kfree(name);
if (err < 0)
break;
@@ -594,7 +597,8 @@
name = xattr_full_name(handler, name);
if (value)
- return __ubifs_setxattr(inode, name, value, size, flags);
+ return __ubifs_setxattr(inode, name, value, size, flags,
+ true);
else
return __ubifs_removexattr(inode, name);
}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4942549..4b1f6d5 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -710,7 +710,7 @@
else
sectorsize = sb->s_blocksize;
- sector += (sbi->s_session << sb->s_blocksize_bits);
+ sector += (((loff_t)sbi->s_session) << sb->s_blocksize_bits);
udf_debug("Starting at sector %u (%ld byte sectors)\n",
(unsigned int)(sector >> sb->s_blocksize_bits),
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 2cde073..9d9c032 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -419,7 +419,7 @@
* in such case.
*/
down_read(&mm->mmap_sem);
- ret = 0;
+ ret = VM_FAULT_NOPAGE;
}
}
diff --git a/fs/xattr.c b/fs/xattr.c
index 932b906..1b00bab 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -130,7 +130,7 @@
return -EPERM;
}
- return inode_permission(inode, mask);
+ return inode_permission2(ERR_PTR(-EOPNOTSUPP), inode, mask);
}
int
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7eb9970..8ad65d4 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -2713,7 +2713,7 @@
&i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
- cur->bc_rec.b.br_state = XFS_EXT_NORM;
+ cur->bc_rec.b.br_state = new->br_state;
if ((error = xfs_btree_insert(cur, &i)))
goto done;
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index d31cd1e..f3acecf 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -391,7 +391,7 @@
(ip->i_df.if_flags & XFS_IFEXTENTS));
ASSERT(offset <= mp->m_super->s_maxbytes);
- if (offset + count > mp->m_super->s_maxbytes)
+ if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes)
count = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -1295,7 +1295,7 @@
if (mapping_size > size)
mapping_size = size;
if (offset < i_size_read(inode) &&
- offset + mapping_size >= i_size_read(inode)) {
+ (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) {
/* limit mapping to block that spans EOF */
mapping_size = roundup_64(i_size_read(inode) - offset,
i_blocksize(inode));
@@ -1347,7 +1347,7 @@
lockmode = xfs_ilock_data_map_shared(ip);
ASSERT(offset <= mp->m_super->s_maxbytes);
- if (offset + size > mp->m_super->s_maxbytes)
+ if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes)
size = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index eca7bae..3f45d98 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1785,22 +1785,27 @@
btp->bt_bdi = blk_get_backing_dev_info(bdev);
if (xfs_setsize_buftarg_early(btp, bdev))
- goto error;
+ goto error_free;
if (list_lru_init(&btp->bt_lru))
- goto error;
+ goto error_free;
if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
- goto error;
+ goto error_lru;
btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
btp->bt_shrinker.seeks = DEFAULT_SEEKS;
btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
- register_shrinker(&btp->bt_shrinker);
+ if (register_shrinker(&btp->bt_shrinker))
+ goto error_pcpu;
return btp;
-error:
+error_pcpu:
+ percpu_counter_destroy(&btp->bt_io_count);
+error_lru:
+ list_lru_destroy(&btp->bt_lru);
+error_free:
kmem_free(btp);
return NULL;
}
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9d06cc3..7a7b3cc 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1004,14 +1004,22 @@
* holding the lock before removing the dquot from the AIL.
*/
if ((lip->li_flags & XFS_LI_IN_AIL) &&
- lip->li_lsn == qip->qli_flush_lsn) {
+ ((lip->li_lsn == qip->qli_flush_lsn) ||
+ (lip->li_flags & XFS_LI_FAILED))) {
/* xfs_trans_ail_delete() drops the AIL lock. */
spin_lock(&ailp->xa_lock);
- if (lip->li_lsn == qip->qli_flush_lsn)
+ if (lip->li_lsn == qip->qli_flush_lsn) {
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
- else
+ } else {
+ /*
+ * Clear the failed state since we are about to drop the
+ * flush lock
+ */
+ if (lip->li_flags & XFS_LI_FAILED)
+ xfs_clear_li_failed(lip);
spin_unlock(&ailp->xa_lock);
+ }
}
/*
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 2c7a162..664dea1 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -137,6 +137,26 @@
wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
}
+/*
+ * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
+ * have been failed during writeback
+ *
+ * this informs the AIL that the dquot is already flush locked on the next push,
+ * and acquires a hold on the buffer to ensure that it isn't reclaimed before
+ * dirty data makes it to disk.
+ */
+STATIC void
+xfs_dquot_item_error(
+ struct xfs_log_item *lip,
+ struct xfs_buf *bp)
+{
+ struct xfs_dquot *dqp;
+
+ dqp = DQUOT_ITEM(lip)->qli_dquot;
+ ASSERT(!completion_done(&dqp->q_flush));
+ xfs_set_li_failed(lip, bp);
+}
+
STATIC uint
xfs_qm_dquot_logitem_push(
struct xfs_log_item *lip,
@@ -144,13 +164,28 @@
__acquires(&lip->li_ailp->xa_lock)
{
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
- struct xfs_buf *bp = NULL;
+ struct xfs_buf *bp = lip->li_buf;
uint rval = XFS_ITEM_SUCCESS;
int error;
if (atomic_read(&dqp->q_pincount) > 0)
return XFS_ITEM_PINNED;
+ /*
+ * The buffer containing this item failed to be written back
+ * previously. Resubmit the buffer for IO
+ */
+ if (lip->li_flags & XFS_LI_FAILED) {
+ if (!xfs_buf_trylock(bp))
+ return XFS_ITEM_LOCKED;
+
+ if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list))
+ rval = XFS_ITEM_FLUSHING;
+
+ xfs_buf_unlock(bp);
+ return rval;
+ }
+
if (!xfs_dqlock_nowait(dqp))
return XFS_ITEM_LOCKED;
@@ -242,7 +277,8 @@
.iop_unlock = xfs_qm_dquot_logitem_unlock,
.iop_committed = xfs_qm_dquot_logitem_committed,
.iop_push = xfs_qm_dquot_logitem_push,
- .iop_committing = xfs_qm_dquot_logitem_committing
+ .iop_committing = xfs_qm_dquot_logitem_committing,
+ .iop_error = xfs_dquot_item_error
};
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index fe9a9a1..c5f2f1e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2386,6 +2386,7 @@
*/
if (ip->i_ino != inum + i) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ rcu_read_unlock();
continue;
}
}
@@ -2429,6 +2430,24 @@
}
/*
+ * Free any local-format buffers sitting around before we reset to
+ * extents format.
+ */
+static inline void
+xfs_ifree_local_data(
+ struct xfs_inode *ip,
+ int whichfork)
+{
+ struct xfs_ifork *ifp;
+
+ if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+ return;
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
+}
+
+/*
* This is called to return an inode to the inode free list.
* The inode should already be truncated to 0 length and have
* no pages associated with it. This routine also assumes that
@@ -2465,6 +2484,9 @@
if (error)
return error;
+ xfs_ifree_local_data(ip, XFS_DATA_FORK);
+ xfs_ifree_local_data(ip, XFS_ATTR_FORK);
+
VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
ip->i_d.di_flags = 0;
ip->i_d.di_dmevmask = 0;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 5b81f7f..33c3899 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -871,22 +871,6 @@
return error;
/*
- * We are going to log the inode size change in this transaction so
- * any previous writes that are beyond the on disk EOF and the new
- * EOF that have not been written out need to be written here. If we
- * do not write the data out, we expose ourselves to the null files
- * problem. Note that this includes any block zeroing we did above;
- * otherwise those blocks may not be zeroed after a crash.
- */
- if (did_zeroing ||
- (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
- error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
- ip->i_d.di_size, newsize);
- if (error)
- return error;
- }
-
- /*
* We've already locked out new page faults, so now we can safely remove
* pages from the page cache knowing they won't get refaulted until we
* drop the XFS_MMAP_EXCL lock after the extent manipulations are
@@ -902,9 +886,29 @@
* user visible changes). There's not much we can do about this, except
* to hope that the caller sees ENOMEM and retries the truncate
* operation.
+ *
+ * And we update in-core i_size and truncate page cache beyond newsize
+ * before writeback the [di_size, newsize] range, so we're guaranteed
+ * not to write stale data past the new EOF on truncate down.
*/
truncate_setsize(inode, newsize);
+ /*
+ * We are going to log the inode size change in this transaction so
+ * any previous writes that are beyond the on disk EOF and the new
+ * EOF that have not been written out need to be written here. If we
+ * do not write the data out, we expose ourselves to the null files
+ * problem. Note that this includes any block zeroing we did above;
+ * otherwise those blocks may not be zeroed after a crash.
+ */
+ if (did_zeroing ||
+ (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
+ error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+ ip->i_d.di_size, newsize - 1);
+ if (error)
+ return error;
+ }
+
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
if (error)
return error;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0590926..1e26f45 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -753,7 +753,7 @@
* in the in-core log. The following number can be made tighter if
* we actually look at the block size of the filesystem.
*/
- num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
+ num_scan_bblks = min_t(int, log_bbnum, XLOG_TOTAL_REC_SHIFT(log));
if (head_blk >= num_scan_bblks) {
/*
* We are guaranteed that the entire check can be performed
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 4df64a1..e02a3d9 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -27,6 +27,8 @@
* __kprobes_text_start, __kprobes_text_end
* __entry_text_start, __entry_text_end
* __ctors_start, __ctors_end
+ * __irqentry_text_start, __irqentry_text_end
+ * __softirqentry_text_start, __softirqentry_text_end
*/
extern char _text[], _stext[], _etext[];
extern char _data[], _sdata[], _edata[];
@@ -39,6 +41,8 @@
extern char __kprobes_text_start[], __kprobes_text_end[];
extern char __entry_text_start[], __entry_text_end[];
extern char __start_rodata[], __end_rodata[];
+extern char __irqentry_text_start[], __irqentry_text_end[];
+extern char __softirqentry_text_start[], __softirqentry_text_end[];
/* Start and end of .ctors section - used for constructor calls. */
extern char __ctors_start[], __ctors_end[];
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index dc81e52..3a396a9 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -499,25 +499,17 @@
*(.entry.text) \
VMLINUX_SYMBOL(__entry_text_end) = .;
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
#define IRQENTRY_TEXT \
ALIGN_FUNCTION(); \
VMLINUX_SYMBOL(__irqentry_text_start) = .; \
*(.irqentry.text) \
VMLINUX_SYMBOL(__irqentry_text_end) = .;
-#else
-#define IRQENTRY_TEXT
-#endif
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
#define SOFTIRQENTRY_TEXT \
ALIGN_FUNCTION(); \
VMLINUX_SYMBOL(__softirqentry_text_start) = .; \
*(.softirqentry.text) \
VMLINUX_SYMBOL(__softirqentry_text_end) = .;
-#else
-#define SOFTIRQENTRY_TEXT
-#endif
/* Section used for early init (in .S files) */
#define HEAD_TEXT *(.head.text)
@@ -778,7 +770,14 @@
*/
#define PERCPU_INPUT(cacheline) \
VMLINUX_SYMBOL(__per_cpu_start) = .; \
+ VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .; \
*(.data..percpu..first) \
+ . = ALIGN(cacheline); \
+ *(.data..percpu..user_mapped) \
+ *(.data..percpu..user_mapped..shared_aligned) \
+ . = ALIGN(PAGE_SIZE); \
+ *(.data..percpu..user_mapped..page_aligned) \
+ VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .; \
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
. = ALIGN(cacheline); \
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index f6d9af3e..5203560 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -80,6 +80,16 @@
struct ahash_instance *inst);
void ahash_free_instance(struct crypto_instance *inst);
+int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen);
+
+static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
+{
+ return alg->setkey != shash_no_setkey;
+}
+
+bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg);
+
int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn,
struct hash_alg_common *alg,
struct crypto_instance *inst);
diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h
index 4a53c0d..e045722 100644
--- a/include/crypto/mcryptd.h
+++ b/include/crypto/mcryptd.h
@@ -26,6 +26,7 @@
struct mcryptd_cpu_queue {
struct crypto_queue queue;
+ spinlock_t q_lock;
struct work_struct work;
};
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index 894df59..d586f74 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -30,8 +30,6 @@
};
int crypto_poly1305_init(struct shash_desc *desc);
-int crypto_poly1305_setkey(struct crypto_shash *tfm,
- const u8 *key, unsigned int keylen);
unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
const u8 *src, unsigned int srclen);
int crypto_poly1305_update(struct shash_desc *desc,
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 61a3d90..ca2b4c4 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -276,11 +276,8 @@
/* Arch dependent functions for cpu hotplug support */
int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu);
int acpi_unmap_cpu(int cpu);
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid);
#endif /* CONFIG_ACPI_HOTPLUG_CPU */
-void acpi_set_processor_mapping(void);
-
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
#endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e47a7f7..fe8dd27 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1738,43 +1738,26 @@
#define BLK_IO_LAT_HIST_ZERO 2
struct io_latency_state {
- u_int64_t latency_y_axis_read[ARRAY_SIZE(latency_x_axis_us) + 1];
- u_int64_t latency_reads_elems;
- u_int64_t latency_y_axis_write[ARRAY_SIZE(latency_x_axis_us) + 1];
- u_int64_t latency_writes_elems;
+ u_int64_t latency_y_axis[ARRAY_SIZE(latency_x_axis_us) + 1];
+ u_int64_t latency_elems;
+ u_int64_t latency_sum;
};
static inline void
-blk_update_latency_hist(struct io_latency_state *s,
- int read,
- u_int64_t delta_us)
+blk_update_latency_hist(struct io_latency_state *s, u_int64_t delta_us)
{
int i;
- for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++) {
- if (delta_us < (u_int64_t)latency_x_axis_us[i]) {
- if (read)
- s->latency_y_axis_read[i]++;
- else
- s->latency_y_axis_write[i]++;
+ for (i = 0; i < ARRAY_SIZE(latency_x_axis_us); i++)
+ if (delta_us < (u_int64_t)latency_x_axis_us[i])
break;
- }
- }
- if (i == ARRAY_SIZE(latency_x_axis_us)) {
- /* Overflowed the histogram */
- if (read)
- s->latency_y_axis_read[i]++;
- else
- s->latency_y_axis_write[i]++;
- }
- if (read)
- s->latency_reads_elems++;
- else
- s->latency_writes_elems++;
+ s->latency_y_axis[i]++;
+ s->latency_elems++;
+ s->latency_sum += delta_us;
}
-void blk_zero_latency_hist(struct io_latency_state *s);
-ssize_t blk_latency_hist_show(struct io_latency_state *s, char *buf);
+ssize_t blk_latency_hist_show(char* name, struct io_latency_state *s,
+ char *buf, int buf_size);
#else /* CONFIG_BLOCK */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 97b3c2a..193c40e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -36,17 +36,26 @@
};
struct bpf_map {
- atomic_t refcnt;
+ /* 1st cacheline with read-mostly members of which some
+ * are also accessed in fast-path (e.g. ops, max_entries).
+ */
+ const struct bpf_map_ops *ops ____cacheline_aligned;
enum bpf_map_type map_type;
u32 key_size;
u32 value_size;
u32 max_entries;
u32 map_flags;
u32 pages;
- struct user_struct *user;
- const struct bpf_map_ops *ops;
- struct work_struct work;
+ bool unpriv_array;
+ /* 7 bytes hole */
+
+ /* 2nd cacheline with misc members to avoid false sharing
+ * particularly with refcounting.
+ */
+ struct user_struct *user ____cacheline_aligned;
+ atomic_t refcnt;
atomic_t usercnt;
+ struct work_struct work;
#ifdef CONFIG_SECURITY
void *security;
#endif
@@ -195,6 +204,7 @@
struct bpf_array {
struct bpf_map map;
u32 elem_size;
+ u32 index_mask;
/* 'ownership' of prog_array is claimed by the first program that
* is going to use this map or by the first program which FD is stored
* in the map to make sure that all callers and callees have the same
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 3101141..070fc49 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -67,7 +67,11 @@
};
struct bpf_insn_aux_data {
- enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
+ union {
+ enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
+ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
+ };
+ bool seen; /* this insn was processed by the verifier */
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 447a915..4431ea2 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -239,12 +239,10 @@
{
if (err == 0)
return VM_FAULT_LOCKED;
- if (err == -EFAULT)
+ if (err == -EFAULT || err == -EAGAIN)
return VM_FAULT_NOPAGE;
if (err == -ENOMEM)
return VM_FAULT_OOM;
- if (err == -EAGAIN)
- return VM_FAULT_RETRY;
/* -ENOSPC, -EDQUOT, -EIO ... */
return VM_FAULT_SIGBUS;
}
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 2189935..a951fd1 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -71,6 +71,7 @@
struct cacheinfo *info_list;
unsigned int num_levels;
unsigned int num_leaves;
+ bool cpu_map_populated;
};
/*
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index dfd2b7c..b86c3fb 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -44,6 +44,13 @@
extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
+extern ssize_t cpu_show_meltdown(struct device *dev,
+ struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf);
+
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
const struct attribute_group **groups,
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 965cc56..c9447a6 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -48,7 +48,7 @@
CPUHP_ARM_SHMOBILE_SCU_PREPARE,
CPUHP_SH_SH3X_PREPARE,
CPUHP_BLK_MQ_PREPARE,
- CPUHP_TIMERS_DEAD,
+ CPUHP_TIMERS_PREPARE,
CPUHP_NOTF_ERR_INJ_PREPARE,
CPUHP_MIPS_SOC_PREPARE,
CPUHP_BRINGUP_CPU,
diff --git a/include/linux/cred.h b/include/linux/cred.h
index f0e70a1..cf1a5d0 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -82,6 +82,7 @@
extern void set_groups(struct cred *, struct group_info *);
extern int groups_search(const struct group_info *, kgid_t);
extern bool may_setgroups(void);
+extern void groups_sort(struct group_info *);
/*
* The security context of a task
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 7cee555..0a72625 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/uaccess.h>
+#include <linux/completion.h>
/*
* Autoloaded crypto modules should only use a prefixed name to avoid allowing
@@ -465,6 +466,45 @@
} CRYPTO_MINALIGN_ATTR;
/*
+ * A helper struct for waiting for completion of async crypto ops
+ */
+struct crypto_wait {
+ struct completion completion;
+ int err;
+};
+
+/*
+ * Macro for declaring a crypto op async wait object on stack
+ */
+#define DECLARE_CRYPTO_WAIT(_wait) \
+ struct crypto_wait _wait = { \
+ COMPLETION_INITIALIZER_ONSTACK((_wait).completion), 0 }
+
+/*
+ * Async ops completion helper functioons
+ */
+void crypto_req_done(struct crypto_async_request *req, int err);
+
+static inline int crypto_wait_req(int err, struct crypto_wait *wait)
+{
+ switch (err) {
+ case -EINPROGRESS:
+ case -EBUSY:
+ wait_for_completion(&wait->completion);
+ reinit_completion(&wait->completion);
+ err = wait->err;
+ break;
+ };
+
+ return err;
+}
+
+static inline void crypto_init_wait(struct crypto_wait *wait)
+{
+ init_completion(&wait->completion);
+}
+
+/*
* Algorithm registration interface.
*/
int crypto_register_alg(struct crypto_alg *alg);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 08528af..704caae 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -659,7 +659,6 @@
return ret;
}
-#ifdef CONFIG_HAS_DMA
static inline int dma_get_cache_alignment(void)
{
#ifdef ARCH_DMA_MINALIGN
@@ -667,7 +666,6 @@
#endif
return 1;
}
-#endif
/* flags for the coherent memory api */
#define DMA_MEMORY_MAP 0x01
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 6e84b2cae..442b54a 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -9,6 +9,7 @@
#include <linux/compiler.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
+#include <linux/nospec.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/fs.h>
@@ -81,8 +82,10 @@
{
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
- if (fd < fdt->max_fds)
+ if (fd < fdt->max_fds) {
+ fd = array_index_nospec(fd, fdt->max_fds);
return rcu_dereference_raw(fdt->fd[fd]);
+ }
return NULL;
}
diff --git a/include/linux/fence.h b/include/linux/fence.h
index fd9b89f..7c9b78c 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -47,7 +47,7 @@
* can be compared to decide which fence would be signaled later.
* @flags: A mask of FENCE_FLAG_* defined below
* @timestamp: Timestamp when the fence was signaled.
- * @status: Optional, only valid if < 0, must be set before calling
+ * @error: Optional, only valid if < 0, must be set before calling
* fence_signal, indicates that the fence has completed with an error.
*
* the flags member must be manipulated and read using the appropriate
@@ -79,7 +79,7 @@
unsigned seqno;
unsigned long flags;
ktime_t timestamp;
- int status;
+ int error;
};
enum fence_flag_bits {
@@ -133,7 +133,7 @@
* or some failure occurred that made it impossible to enable
* signaling. True indicates successful enabling.
*
- * fence->status may be set in enable_signaling, but only when false is
+ * fence->error may be set in enable_signaling, but only when false is
* returned.
*
* Calling fence_signal before enable_signaling is called allows
@@ -145,7 +145,7 @@
* the second time will be a noop since it was already signaled.
*
* Notes on signaled:
- * May set fence->status if returning true.
+ * May set fence->error if returning true.
*
* Notes on wait:
* Must not be NULL, set to fence_default_wait for default implementation.
@@ -329,6 +329,19 @@
}
/**
+ * __fence_is_later - return if f1 is chronologically later than f2
+ * @f1: [in] the first fence's seqno
+ * @f2: [in] the second fence's seqno from the same context
+ *
+ * Returns true if f1 is chronologically later than f2. Both fences must be
+ * from the same context, since a seqno is not common across contexts.
+ */
+static inline bool __fence_is_later(u32 f1, u32 f2)
+{
+ return (int)(f1 - f2) > 0;
+}
+
+/**
* fence_is_later - return if f1 is chronologically later than f2
* @f1: [in] the first fence from the same context
* @f2: [in] the second fence from the same context
@@ -341,7 +354,7 @@
if (WARN_ON(f1->context != f2->context))
return false;
- return (int)(f1->seqno - f2->seqno) > 0;
+ return __fence_is_later(f1->seqno, f2->seqno);
}
/**
@@ -369,6 +382,50 @@
return fence_is_signaled(f2) ? NULL : f2;
}
+/**
+ * fence_get_status_locked - returns the status upon completion
+ * @fence: [in] the fence to query
+ *
+ * Drivers can supply an optional error status condition before they signal
+ * the fence (to indicate whether the fence was completed due to an error
+ * rather than success). The value of the status condition is only valid
+ * if the fence has been signaled, fence_get_status_locked() first checks
+ * the signal state before reporting the error status.
+ *
+ * Returns 0 if the fence has not yet been signaled, 1 if the fence has
+ * been signaled without an error condition, or a negative error code
+ * if the fence has been completed in err.
+ */
+static inline int fence_get_status_locked(struct fence *fence)
+{
+ if (fence_is_signaled_locked(fence))
+ return fence->error ?: 1;
+ else
+ return 0;
+}
+
+int fence_get_status(struct fence *fence);
+
+/**
+ * fence_set_error - flag an error condition on the fence
+ * @fence: [in] the fence
+ * @error: [in] the error to store
+ *
+ * Drivers can supply an optional error status condition before they signal
+ * the fence, to indicate that the fence was completed due to an error
+ * rather than success. This must be set before signaling (so that the value
+ * is visible before any waiters on the signal callback are woken). This
+ * helper exists to help catching erroneous setting of #fence.error.
+ */
+static inline void fence_set_error(struct fence *fence,
+ int error)
+{
+ BUG_ON(test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags));
+ BUG_ON(error >= 0 || error < -MAX_ERRNO);
+
+ fence->error = error;
+}
+
signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout);
signed long fence_wait_any_timeout(struct fence **fences, uint32_t count,
bool intr, signed long timeout);
diff --git a/include/linux/frame.h b/include/linux/frame.h
index e6baaba..d772c61 100644
--- a/include/linux/frame.h
+++ b/include/linux/frame.h
@@ -11,7 +11,7 @@
* For more information, see tools/objtool/Documentation/stack-validation.txt.
*/
#define STACK_FRAME_NON_STANDARD(func) \
- static void __used __section(__func_stack_frame_non_standard) \
+ static void __used __section(.discard.func_stack_frame_non_standard) \
*__func_stack_frame_non_standard_##func = func
#else /* !CONFIG_STACK_VALIDATION */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 18bd249..cbcdcb2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1857,6 +1857,7 @@
#else
#define S_DAX 0 /* Make all the DAX code disappear */
#endif
+#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
@@ -1895,6 +1896,7 @@
#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
#define IS_DAX(inode) ((inode)->i_flags & S_DAX)
+#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED)
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
(inode)->i_rdev == WHITEOUT_DEV)
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 115bb81..94a8aae 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -764,7 +764,7 @@
{
if (fscache_cookie_valid(cookie) && PageFsCache(page))
return __fscache_maybe_release_page(cookie, page, gfp);
- return false;
+ return true;
}
/**
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
new file mode 100644
index 0000000..8641e56
--- /dev/null
+++ b/include/linux/fscrypt.h
@@ -0,0 +1,290 @@
+/*
+ * fscrypt.h: declarations for per-file encryption
+ *
+ * Filesystems that implement per-file encryption include this header
+ * file with the __FS_HAS_ENCRYPTION set according to whether that filesystem
+ * is being built with encryption support or not.
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * Written by Michael Halcrow, 2015.
+ * Modified by Jaegeuk Kim, 2015.
+ */
+#ifndef _LINUX_FSCRYPT_H
+#define _LINUX_FSCRYPT_H
+
+#include <linux/key.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/bio.h>
+#include <linux/dcache.h>
+#include <crypto/skcipher.h>
+#include <uapi/linux/fs.h>
+
+#define FS_CRYPTO_BLOCK_SIZE 16
+
+struct fscrypt_info;
+
+struct fscrypt_ctx {
+ union {
+ struct {
+ struct page *bounce_page; /* Ciphertext page */
+ struct page *control_page; /* Original page */
+ } w;
+ struct {
+ struct bio *bio;
+ struct work_struct work;
+ } r;
+ struct list_head free_list; /* Free list */
+ };
+ u8 flags; /* Flags */
+};
+
+/**
+ * For encrypted symlinks, the ciphertext length is stored at the beginning
+ * of the string in little-endian format.
+ */
+struct fscrypt_symlink_data {
+ __le16 len;
+ char encrypted_path[1];
+} __packed;
+
+struct fscrypt_str {
+ unsigned char *name;
+ u32 len;
+};
+
+struct fscrypt_name {
+ const struct qstr *usr_fname;
+ struct fscrypt_str disk_name;
+ u32 hash;
+ u32 minor_hash;
+ struct fscrypt_str crypto_buf;
+};
+
+#define FSTR_INIT(n, l) { .name = n, .len = l }
+#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len)
+#define fname_name(p) ((p)->disk_name.name)
+#define fname_len(p) ((p)->disk_name.len)
+
+/*
+ * fscrypt superblock flags
+ */
+#define FS_CFLG_OWN_PAGES (1U << 1)
+
+/*
+ * crypto opertions for filesystems
+ */
+struct fscrypt_operations {
+ unsigned int flags;
+ const char *key_prefix;
+ int (*get_context)(struct inode *, void *, size_t);
+ int (*set_context)(struct inode *, const void *, size_t, void *);
+ bool (*dummy_context)(struct inode *);
+ bool (*empty_dir)(struct inode *);
+ unsigned (*max_namelen)(struct inode *);
+};
+
+static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
+{
+ if (inode->i_sb->s_cop->dummy_context &&
+ inode->i_sb->s_cop->dummy_context(inode))
+ return true;
+ return false;
+}
+
+static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
+ u32 filenames_mode)
+{
+ if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC &&
+ filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS)
+ return true;
+
+ if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS &&
+ filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
+ return true;
+
+ return false;
+}
+
+static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
+{
+ if (str->len == 1 && str->name[0] == '.')
+ return true;
+
+ if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
+ return true;
+
+ return false;
+}
+
+#if __FS_HAS_ENCRYPTION
+
+static inline struct page *fscrypt_control_page(struct page *page)
+{
+ return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
+}
+
+static inline bool fscrypt_has_encryption_key(const struct inode *inode)
+{
+ return (inode->i_crypt_info != NULL);
+}
+
+#include <linux/fscrypt_supp.h>
+
+#else /* !__FS_HAS_ENCRYPTION */
+
+static inline struct page *fscrypt_control_page(struct page *page)
+{
+ WARN_ON_ONCE(1);
+ return ERR_PTR(-EINVAL);
+}
+
+static inline bool fscrypt_has_encryption_key(const struct inode *inode)
+{
+ return 0;
+}
+
+#include <linux/fscrypt_notsupp.h>
+#endif /* __FS_HAS_ENCRYPTION */
+
+/**
+ * fscrypt_require_key - require an inode's encryption key
+ * @inode: the inode we need the key for
+ *
+ * If the inode is encrypted, set up its encryption key if not already done.
+ * Then require that the key be present and return -ENOKEY otherwise.
+ *
+ * No locks are needed, and the key will live as long as the struct inode --- so
+ * it won't go away from under you.
+ *
+ * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code
+ * if a problem occurred while setting up the encryption key.
+ */
+static inline int fscrypt_require_key(struct inode *inode)
+{
+ if (IS_ENCRYPTED(inode)) {
+ int err = fscrypt_get_encryption_info(inode);
+
+ if (err)
+ return err;
+ if (!fscrypt_has_encryption_key(inode))
+ return -ENOKEY;
+ }
+ return 0;
+}
+
+/**
+ * fscrypt_prepare_link - prepare to link an inode into a possibly-encrypted directory
+ * @old_dentry: an existing dentry for the inode being linked
+ * @dir: the target directory
+ * @dentry: negative dentry for the target filename
+ *
+ * A new link can only be added to an encrypted directory if the directory's
+ * encryption key is available --- since otherwise we'd have no way to encrypt
+ * the filename. Therefore, we first set up the directory's encryption key (if
+ * not already done) and return an error if it's unavailable.
+ *
+ * We also verify that the link will not violate the constraint that all files
+ * in an encrypted directory tree use the same encryption policy.
+ *
+ * Return: 0 on success, -ENOKEY if the directory's encryption key is missing,
+ * -EPERM if the link would result in an inconsistent encryption policy, or
+ * another -errno code.
+ */
+static inline int fscrypt_prepare_link(struct dentry *old_dentry,
+ struct inode *dir,
+ struct dentry *dentry)
+{
+ if (IS_ENCRYPTED(dir))
+ return __fscrypt_prepare_link(d_inode(old_dentry), dir);
+ return 0;
+}
+
+/**
+ * fscrypt_prepare_rename - prepare for a rename between possibly-encrypted directories
+ * @old_dir: source directory
+ * @old_dentry: dentry for source file
+ * @new_dir: target directory
+ * @new_dentry: dentry for target location (may be negative unless exchanging)
+ * @flags: rename flags (we care at least about %RENAME_EXCHANGE)
+ *
+ * Prepare for ->rename() where the source and/or target directories may be
+ * encrypted. A new link can only be added to an encrypted directory if the
+ * directory's encryption key is available --- since otherwise we'd have no way
+ * to encrypt the filename. A rename to an existing name, on the other hand,
+ * *is* cryptographically possible without the key. However, we take the more
+ * conservative approach and just forbid all no-key renames.
+ *
+ * We also verify that the rename will not violate the constraint that all files
+ * in an encrypted directory tree use the same encryption policy.
+ *
+ * Return: 0 on success, -ENOKEY if an encryption key is missing, -EPERM if the
+ * rename would cause inconsistent encryption policies, or another -errno code.
+ */
+static inline int fscrypt_prepare_rename(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry,
+ unsigned int flags)
+{
+ if (IS_ENCRYPTED(old_dir) || IS_ENCRYPTED(new_dir))
+ return __fscrypt_prepare_rename(old_dir, old_dentry,
+ new_dir, new_dentry, flags);
+ return 0;
+}
+
+/**
+ * fscrypt_prepare_lookup - prepare to lookup a name in a possibly-encrypted directory
+ * @dir: directory being searched
+ * @dentry: filename being looked up
+ * @flags: lookup flags
+ *
+ * Prepare for ->lookup() in a directory which may be encrypted. Lookups can be
+ * done with or without the directory's encryption key; without the key,
+ * filenames are presented in encrypted form. Therefore, we'll try to set up
+ * the directory's encryption key, but even without it the lookup can continue.
+ *
+ * To allow invalidating stale dentries if the directory's encryption key is
+ * added later, we also install a custom ->d_revalidate() method and use the
+ * DCACHE_ENCRYPTED_WITH_KEY flag to indicate whether a given dentry is a
+ * plaintext name (flag set) or a ciphertext name (flag cleared).
+ *
+ * Return: 0 on success, -errno if a problem occurred while setting up the
+ * encryption key
+ */
+static inline int fscrypt_prepare_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags)
+{
+ if (IS_ENCRYPTED(dir))
+ return __fscrypt_prepare_lookup(dir, dentry);
+ return 0;
+}
+
+/**
+ * fscrypt_prepare_setattr - prepare to change a possibly-encrypted inode's attributes
+ * @dentry: dentry through which the inode is being changed
+ * @attr: attributes to change
+ *
+ * Prepare for ->setattr() on a possibly-encrypted inode. On an encrypted file,
+ * most attribute changes are allowed even without the encryption key. However,
+ * without the encryption key we do have to forbid truncates. This is needed
+ * because the size being truncated to may not be a multiple of the filesystem
+ * block size, and in that case we'd have to decrypt the final block, zero the
+ * portion past i_size, and re-encrypt it. (We *could* allow truncating to a
+ * filesystem block boundary, but it's simpler to just forbid all truncates ---
+ * and we already forbid all other contents modifications without the key.)
+ *
+ * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code
+ * if a problem occurred while setting up the encryption key.
+ */
+static inline int fscrypt_prepare_setattr(struct dentry *dentry,
+ struct iattr *attr)
+{
+ if (attr->ia_valid & ATTR_SIZE)
+ return fscrypt_require_key(d_inode(dentry));
+ return 0;
+}
+
+#endif /* _LINUX_FSCRYPT_H */
diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h
deleted file mode 100644
index 4022c61..0000000
--- a/include/linux/fscrypt_common.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * fscrypt_common.h: common declarations for per-file encryption
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * Written by Michael Halcrow, 2015.
- * Modified by Jaegeuk Kim, 2015.
- */
-
-#ifndef _LINUX_FSCRYPT_COMMON_H
-#define _LINUX_FSCRYPT_COMMON_H
-
-#include <linux/key.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/bio.h>
-#include <linux/dcache.h>
-#include <crypto/skcipher.h>
-#include <uapi/linux/fs.h>
-
-#define FS_CRYPTO_BLOCK_SIZE 16
-
-struct fscrypt_info;
-
-struct fscrypt_ctx {
- union {
- struct {
- struct page *bounce_page; /* Ciphertext page */
- struct page *control_page; /* Original page */
- } w;
- struct {
- struct bio *bio;
- struct work_struct work;
- } r;
- struct list_head free_list; /* Free list */
- };
- u8 flags; /* Flags */
-};
-
-/**
- * For encrypted symlinks, the ciphertext length is stored at the beginning
- * of the string in little-endian format.
- */
-struct fscrypt_symlink_data {
- __le16 len;
- char encrypted_path[1];
-} __packed;
-
-struct fscrypt_str {
- unsigned char *name;
- u32 len;
-};
-
-struct fscrypt_name {
- const struct qstr *usr_fname;
- struct fscrypt_str disk_name;
- u32 hash;
- u32 minor_hash;
- struct fscrypt_str crypto_buf;
-};
-
-#define FSTR_INIT(n, l) { .name = n, .len = l }
-#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len)
-#define fname_name(p) ((p)->disk_name.name)
-#define fname_len(p) ((p)->disk_name.len)
-
-/*
- * fscrypt superblock flags
- */
-#define FS_CFLG_OWN_PAGES (1U << 1)
-
-/*
- * crypto opertions for filesystems
- */
-struct fscrypt_operations {
- unsigned int flags;
- const char *key_prefix;
- int (*get_context)(struct inode *, void *, size_t);
- int (*set_context)(struct inode *, const void *, size_t, void *);
- int (*dummy_context)(struct inode *);
- bool (*is_encrypted)(struct inode *);
- bool (*empty_dir)(struct inode *);
- unsigned (*max_namelen)(struct inode *);
-};
-
-static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
-{
- if (inode->i_sb->s_cop->dummy_context &&
- inode->i_sb->s_cop->dummy_context(inode))
- return true;
- return false;
-}
-
-static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
- u32 filenames_mode)
-{
- if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC &&
- filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS)
- return true;
-
- if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS &&
- filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
- return true;
-
- return false;
-}
-
-static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
-{
- if (str->len == 1 && str->name[0] == '.')
- return true;
-
- if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
- return true;
-
- return false;
-}
-
-static inline struct page *fscrypt_control_page(struct page *page)
-{
-#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
- return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
-#else
- WARN_ON_ONCE(1);
- return ERR_PTR(-EINVAL);
-#endif
-}
-
-static inline int fscrypt_has_encryption_key(const struct inode *inode)
-{
-#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
- return (inode->i_crypt_info != NULL);
-#else
- return 0;
-#endif
-}
-
-#endif /* _LINUX_FSCRYPT_COMMON_H */
diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h
index ec406ae..c4c6bf2 100644
--- a/include/linux/fscrypt_notsupp.h
+++ b/include/linux/fscrypt_notsupp.h
@@ -3,13 +3,16 @@
*
* This stubs out the fscrypt functions for filesystems configured without
* encryption support.
+ *
+ * Do not include this file directly. Use fscrypt.h instead!
*/
+#ifndef _LINUX_FSCRYPT_H
+#error "Incorrect include of linux/fscrypt_notsupp.h!"
+#endif
#ifndef _LINUX_FSCRYPT_NOTSUPP_H
#define _LINUX_FSCRYPT_NOTSUPP_H
-#include <linux/fscrypt_common.h>
-
/* crypto.c */
static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode,
gfp_t gfp_flags)
@@ -97,7 +100,7 @@
const struct qstr *iname,
int lookup, struct fscrypt_name *fname)
{
- if (dir->i_sb->s_cop->is_encrypted(dir))
+ if (IS_ENCRYPTED(dir))
return -EOPNOTSUPP;
memset(fname, 0, sizeof(struct fscrypt_name));
@@ -174,4 +177,34 @@
return -EOPNOTSUPP;
}
+/* hooks.c */
+
+static inline int fscrypt_file_open(struct inode *inode, struct file *filp)
+{
+ if (IS_ENCRYPTED(inode))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+static inline int __fscrypt_prepare_link(struct inode *inode,
+ struct inode *dir)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __fscrypt_prepare_rename(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry,
+ unsigned int flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __fscrypt_prepare_lookup(struct inode *dir,
+ struct dentry *dentry)
+{
+ return -EOPNOTSUPP;
+}
+
#endif /* _LINUX_FSCRYPT_NOTSUPP_H */
diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h
index 32e2fcf..2db5e970 100644
--- a/include/linux/fscrypt_supp.h
+++ b/include/linux/fscrypt_supp.h
@@ -1,14 +1,15 @@
/*
* fscrypt_supp.h
*
- * This is included by filesystems configured with encryption support.
+ * Do not include this file directly. Use fscrypt.h instead!
*/
+#ifndef _LINUX_FSCRYPT_H
+#error "Incorrect include of linux/fscrypt_supp.h!"
+#endif
#ifndef _LINUX_FSCRYPT_SUPP_H
#define _LINUX_FSCRYPT_SUPP_H
-#include <linux/fscrypt_common.h>
-
/* crypto.c */
extern struct kmem_cache *fscrypt_info_cachep;
extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
@@ -142,4 +143,14 @@
extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
unsigned int);
+/* hooks.c */
+extern int fscrypt_file_open(struct inode *inode, struct file *filp);
+extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir);
+extern int __fscrypt_prepare_rename(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry,
+ unsigned int flags);
+extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry);
+
#endif /* _LINUX_FSCRYPT_SUPP_H */
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index e5f03a4d..ff51592 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -213,12 +213,20 @@
static inline void fsnotify_open(struct file *file)
{
struct path *path = &file->f_path;
+ struct path lower_path;
struct inode *inode = path->dentry->d_inode;
+
__u32 mask = FS_OPEN;
if (S_ISDIR(inode->i_mode))
mask |= FS_ISDIR;
+ if (path->dentry->d_op && path->dentry->d_op->d_canonical_path) {
+ path->dentry->d_op->d_canonical_path(path, &lower_path);
+ fsnotify_parent(&lower_path, NULL, mask);
+ fsnotify(lower_path.dentry->d_inode, mask, &lower_path, FSNOTIFY_EVENT_PATH, NULL, 0);
+ path_put(&lower_path);
+ }
fsnotify_parent(path, NULL, mask);
fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0);
}
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 29d4385..206fe3b 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -32,6 +32,7 @@
#include <linux/types.h>
#include <linux/spinlock_types.h>
+#include <linux/atomic.h>
struct device;
struct device_node;
@@ -70,7 +71,7 @@
*/
struct gen_pool_chunk {
struct list_head next_chunk; /* next chunk in pool */
- atomic_t avail;
+ atomic_long_t avail;
phys_addr_t phys_addr; /* physical starting address of memory chunk */
unsigned long start_addr; /* start address of memory chunk */
unsigned long end_addr; /* end address of memory chunk (inclusive) */
diff --git a/include/linux/init.h b/include/linux/init.h
index e30104c..8e346d1 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -4,6 +4,13 @@
#include <linux/compiler.h>
#include <linux/types.h>
+/* Built-in __init functions needn't be compiled with retpoline */
+#if defined(RETPOLINE) && !defined(MODULE)
+#define __noretpoline __attribute__((indirect_branch("keep")))
+#else
+#define __noretpoline
+#endif
+
/* These macros are used to mark some functions or
* initialized data (doesn't apply to uninitialized data)
* as `initialization' functions. The kernel can take this
@@ -39,7 +46,7 @@
/* These are for everybody (although not all archs will actually
discard it in modules) */
-#define __init __section(.init.text) __cold notrace __latent_entropy
+#define __init __section(.init.text) __cold notrace __latent_entropy __noretpoline
#define __initdata __section(.init.data)
#define __initconst __section(.init.rodata)
#define __exitdata __section(.exit.data)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 72f0721..999b7c3 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -18,6 +18,7 @@
#include <linux/atomic.h>
#include <asm/ptrace.h>
#include <asm/irq.h>
+#include <asm/sections.h>
/*
* These correspond to the IORESOURCE_IRQ_* defines in
@@ -699,7 +700,6 @@
extern int arch_probe_nr_irqs(void);
extern int arch_early_irq_init(void);
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
/*
* We want to know which function is an entrypoint of a hardirq or a softirq.
*/
@@ -707,16 +707,4 @@
#define __softirq_entry \
__attribute__((__section__(".softirqentry.text")))
-/* Limits of hardirq entrypoints */
-extern char __irqentry_text_start[];
-extern char __irqentry_text_end[];
-/* Limits of softirq entrypoints */
-extern char __softirqentry_text_start[];
-extern char __softirqentry_text_end[];
-
-#else
-#define __irq_entry
-#define __softirq_entry
-#endif
-
#endif
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 3b94400..11ff751 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -248,7 +248,8 @@
* 100: prefer care-of address
*/
dontfrag:1,
- autoflowlabel:1;
+ autoflowlabel:1,
+ autoflowlabel_set:1;
__u8 min_hopcount;
__u8 tclass;
__be32 rcv_flowinfo;
diff --git a/include/linux/kaiser.h b/include/linux/kaiser.h
new file mode 100644
index 0000000..58c55b1
--- /dev/null
+++ b/include/linux/kaiser.h
@@ -0,0 +1,52 @@
+#ifndef _LINUX_KAISER_H
+#define _LINUX_KAISER_H
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#include <asm/kaiser.h>
+
+static inline int kaiser_map_thread_stack(void *stack)
+{
+ /*
+ * Map that page of kernel stack on which we enter from user context.
+ */
+ return kaiser_add_mapping((unsigned long)stack +
+ THREAD_SIZE - PAGE_SIZE, PAGE_SIZE, __PAGE_KERNEL);
+}
+
+static inline void kaiser_unmap_thread_stack(void *stack)
+{
+ /*
+ * Note: may be called even when kaiser_map_thread_stack() failed.
+ */
+ kaiser_remove_mapping((unsigned long)stack +
+ THREAD_SIZE - PAGE_SIZE, PAGE_SIZE);
+}
+#else
+
+/*
+ * These stubs are used whenever CONFIG_PAGE_TABLE_ISOLATION is off, which
+ * includes architectures that support KAISER, but have it disabled.
+ */
+
+static inline void kaiser_init(void)
+{
+}
+static inline int kaiser_add_mapping(unsigned long addr,
+ unsigned long size, unsigned long flags)
+{
+ return 0;
+}
+static inline void kaiser_remove_mapping(unsigned long start,
+ unsigned long size)
+{
+}
+static inline int kaiser_map_thread_stack(void *stack)
+{
+ return 0;
+}
+static inline void kaiser_unmap_thread_stack(void *stack)
+{
+}
+
+#endif /* !CONFIG_PAGE_TABLE_ISOLATION */
+#endif /* _LINUX_KAISER_H */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 820c0ad..b37afd1 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -30,16 +30,10 @@
}
/* Enable reporting bugs after kasan_disable_current() */
-static inline void kasan_enable_current(void)
-{
- current->kasan_depth++;
-}
+extern void kasan_enable_current(void);
/* Disable reporting bugs for current task */
-static inline void kasan_disable_current(void)
-{
- current->kasan_depth--;
-}
+extern void kasan_disable_current(void);
void kasan_unpoison_shadow(const void *address, size_t size);
@@ -52,7 +46,7 @@
void kasan_cache_create(struct kmem_cache *cache, size_t *size,
unsigned long *flags);
void kasan_cache_shrink(struct kmem_cache *cache);
-void kasan_cache_destroy(struct kmem_cache *cache);
+void kasan_cache_shutdown(struct kmem_cache *cache);
void kasan_poison_slab(struct page *page);
void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
@@ -81,6 +75,9 @@
static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
size_t kasan_metadata_size(struct kmem_cache *cache);
+bool kasan_save_enable_multi_shot(void);
+void kasan_restore_multi_shot(bool enabled);
+
#else /* CONFIG_KASAN */
static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
@@ -98,7 +95,7 @@
size_t *size,
unsigned long *flags) {}
static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
-static inline void kasan_cache_destroy(struct kmem_cache *cache) {}
+static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index 2883ac9..87e2a44 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -7,19 +7,23 @@
#ifdef CONFIG_KCOV
-void kcov_task_init(struct task_struct *t);
-void kcov_task_exit(struct task_struct *t);
-
enum kcov_mode {
/* Coverage collection is not enabled yet. */
KCOV_MODE_DISABLED = 0,
+ /* KCOV was initialized, but tracing mode hasn't been chosen yet. */
+ KCOV_MODE_INIT = 1,
/*
* Tracing coverage collection mode.
* Covered PCs are collected in a per-task buffer.
*/
- KCOV_MODE_TRACE = 1,
+ KCOV_MODE_TRACE_PC = 2,
+ /* Collecting comparison operands mode. */
+ KCOV_MODE_TRACE_CMP = 3,
};
+void kcov_task_init(struct task_struct *t);
+void kcov_task_exit(struct task_struct *t);
+
#else
static inline void kcov_task_init(struct task_struct *t) {}
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 80faf44..dd1b009 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -476,6 +476,7 @@
enum {
MLX4_INTERFACE_STATE_UP = 1 << 0,
MLX4_INTERFACE_STATE_DELETION = 1 << 1,
+ MLX4_INTERFACE_STATE_NOWAIT = 1 << 2,
};
#define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6045d4d..20ee90c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -143,7 +143,7 @@
MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771,
MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772,
MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773,
- MLX5_CMD_OP_SET_RATE_LIMIT = 0x780,
+ MLX5_CMD_OP_SET_PP_RATE_LIMIT = 0x780,
MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781,
MLX5_CMD_OP_ALLOC_PD = 0x800,
MLX5_CMD_OP_DEALLOC_PD = 0x801,
@@ -737,6 +737,12 @@
MLX5_CAP_PORT_TYPE_ETH = 0x1,
};
+enum {
+ MLX5_CAP_UMR_FENCE_STRONG = 0x0,
+ MLX5_CAP_UMR_FENCE_SMALL = 0x1,
+ MLX5_CAP_UMR_FENCE_NONE = 0x2,
+};
+
struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_0[0x80];
@@ -838,7 +844,9 @@
u8 striding_rq[0x1];
u8 reserved_at_201[0x2];
u8 ipoib_basic_offloads[0x1];
- u8 reserved_at_205[0xa];
+ u8 reserved_at_205[0x5];
+ u8 umr_fence[0x2];
+ u8 reserved_at_20c[0x3];
u8 drain_sigerr[0x1];
u8 cmdif_checksum[0x2];
u8 sigerr_cqe[0x1];
@@ -6689,7 +6697,7 @@
u8 vxlan_udp_port[0x10];
};
-struct mlx5_ifc_set_rate_limit_out_bits {
+struct mlx5_ifc_set_pp_rate_limit_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -6698,7 +6706,7 @@
u8 reserved_at_40[0x40];
};
-struct mlx5_ifc_set_rate_limit_in_bits {
+struct mlx5_ifc_set_pp_rate_limit_in_bits {
u8 opcode[0x10];
u8 reserved_at_10[0x10];
@@ -6711,6 +6719,8 @@
u8 reserved_at_60[0x20];
u8 rate_limit[0x20];
+
+ u8 reserved_at_a0[0x160];
};
struct mlx5_ifc_access_register_out_bits {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cae2445..67dffb8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -351,6 +351,7 @@
struct vm_operations_struct {
void (*open)(struct vm_area_struct * area);
void (*close)(struct vm_area_struct * area);
+ int (*split)(struct vm_area_struct * area, unsigned long addr);
int (*mremap)(struct vm_area_struct * area);
int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 634c4c5..c540001 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -63,8 +63,9 @@
* ("bit1" and "bit2" must be single bits)
*/
#define _calc_vm_trans(x, bit1, bit2) \
+ ((!(bit1) || !(bit2)) ? 0 : \
((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \
- : ((x) & (bit1)) / ((bit1) / (bit2)))
+ : ((x) & (bit1)) / ((bit1) / (bit2))))
/*
* Combine the mmap "prot" argument into "vm_flags" used internally.
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index fac3b5c..1808b78 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -409,7 +409,8 @@
#ifdef CONFIG_BLOCK
int latency_hist_enabled;
- struct io_latency_state io_lat_s;
+ struct io_latency_state io_lat_read;
+ struct io_latency_state io_lat_write;
#endif
unsigned long private[0] ____cacheline_aligned;
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 25c0dc3..854dfa6 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -381,18 +381,6 @@
___pmd; \
})
-#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \
-({ \
- unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
- pmd_t ___pmd; \
- \
- ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \
- mmu_notifier_invalidate_range(__mm, ___haddr, \
- ___haddr + HPAGE_PMD_SIZE); \
- \
- ___pmd; \
-})
-
/*
* set_pte_at_notify() sets the pte _after_ running the notifier.
* This is safe to start by updating the secondary MMUs, because the primary MMU
@@ -480,7 +468,6 @@
#define pmdp_clear_young_notify pmdp_test_and_clear_young
#define ptep_clear_flush_notify ptep_clear_flush
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
-#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
#define set_pte_at_notify set_pte_at
#endif /* CONFIG_MMU_NOTIFIER */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fff21a8..e3d7754 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -124,8 +124,9 @@
NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
- /* Second 128 byte cacheline */
+ NR_KAISERTABLE,
NR_BOUNCE,
+ /* Second 128 byte cacheline */
#if IS_ENABLED(CONFIG_ZSMALLOC)
NR_ZSPAGES, /* allocated in zsmalloc */
#endif
@@ -632,6 +633,8 @@
int kswapd_order;
enum zone_type kswapd_classzone_idx;
+ int kswapd_failures; /* Number of 'reclaimed == 0' runs */
+
#ifdef CONFIG_COMPACTION
int kcompactd_max_order;
enum zone_type kcompactd_classzone_idx;
diff --git a/include/linux/module.h b/include/linux/module.h
index 1226280..fd9e121 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -791,6 +791,15 @@
static inline void module_bug_cleanup(struct module *mod) {}
#endif /* CONFIG_GENERIC_BUG */
+#ifdef RETPOLINE
+extern bool retpoline_module_ok(bool has_retpoline);
+#else
+static inline bool retpoline_module_ok(bool has_retpoline)
+{
+ return true;
+}
+#endif
+
#ifdef CONFIG_MODULE_SIG
static inline bool module_sig_ok(struct module *module)
{
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 3aa56e3..b5b43f9 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -270,75 +270,67 @@
#define INVALIDATE_CACHED_RANGE(map, from, size) \
do { if (map->inval_cache) map->inval_cache(map, from, size); } while (0)
+#define map_word_equal(map, val1, val2) \
+({ \
+ int i, ret = 1; \
+ for (i = 0; i < map_words(map); i++) \
+ if ((val1).x[i] != (val2).x[i]) { \
+ ret = 0; \
+ break; \
+ } \
+ ret; \
+})
-static inline int map_word_equal(struct map_info *map, map_word val1, map_word val2)
-{
- int i;
+#define map_word_and(map, val1, val2) \
+({ \
+ map_word r; \
+ int i; \
+ for (i = 0; i < map_words(map); i++) \
+ r.x[i] = (val1).x[i] & (val2).x[i]; \
+ r; \
+})
- for (i = 0; i < map_words(map); i++) {
- if (val1.x[i] != val2.x[i])
- return 0;
- }
+#define map_word_clr(map, val1, val2) \
+({ \
+ map_word r; \
+ int i; \
+ for (i = 0; i < map_words(map); i++) \
+ r.x[i] = (val1).x[i] & ~(val2).x[i]; \
+ r; \
+})
- return 1;
-}
+#define map_word_or(map, val1, val2) \
+({ \
+ map_word r; \
+ int i; \
+ for (i = 0; i < map_words(map); i++) \
+ r.x[i] = (val1).x[i] | (val2).x[i]; \
+ r; \
+})
-static inline map_word map_word_and(struct map_info *map, map_word val1, map_word val2)
-{
- map_word r;
- int i;
+#define map_word_andequal(map, val1, val2, val3) \
+({ \
+ int i, ret = 1; \
+ for (i = 0; i < map_words(map); i++) { \
+ if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) { \
+ ret = 0; \
+ break; \
+ } \
+ } \
+ ret; \
+})
- for (i = 0; i < map_words(map); i++)
- r.x[i] = val1.x[i] & val2.x[i];
-
- return r;
-}
-
-static inline map_word map_word_clr(struct map_info *map, map_word val1, map_word val2)
-{
- map_word r;
- int i;
-
- for (i = 0; i < map_words(map); i++)
- r.x[i] = val1.x[i] & ~val2.x[i];
-
- return r;
-}
-
-static inline map_word map_word_or(struct map_info *map, map_word val1, map_word val2)
-{
- map_word r;
- int i;
-
- for (i = 0; i < map_words(map); i++)
- r.x[i] = val1.x[i] | val2.x[i];
-
- return r;
-}
-
-static inline int map_word_andequal(struct map_info *map, map_word val1, map_word val2, map_word val3)
-{
- int i;
-
- for (i = 0; i < map_words(map); i++) {
- if ((val1.x[i] & val2.x[i]) != val3.x[i])
- return 0;
- }
-
- return 1;
-}
-
-static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word val2)
-{
- int i;
-
- for (i = 0; i < map_words(map); i++) {
- if (val1.x[i] & val2.x[i])
- return 1;
- }
-
- return 0;
-}
+#define map_word_bitsset(map, val1, val2) \
+({ \
+ int i, ret = 0; \
+ for (i = 0; i < map_words(map); i++) { \
+ if ((val1).x[i] & (val2).x[i]) { \
+ ret = 1; \
+ break; \
+ } \
+ } \
+ ret; \
+})
static inline map_word map_word_load(struct map_info *map, const void *ptr)
{
diff --git a/include/linux/nospec.h b/include/linux/nospec.h
new file mode 100644
index 0000000..b99bced
--- /dev/null
+++ b/include/linux/nospec.h
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Linus Torvalds. All rights reserved.
+// Copyright(c) 2018 Alexei Starovoitov. All rights reserved.
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#ifndef _LINUX_NOSPEC_H
+#define _LINUX_NOSPEC_H
+
+/**
+ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
+ * @index: array element index
+ * @size: number of elements in array
+ *
+ * When @index is out of bounds (@index >= @size), the sign bit will be
+ * set. Extend the sign bit to all bits and invert, giving a result of
+ * zero for an out of bounds index, or ~0 if within bounds [0, @size).
+ */
+#ifndef array_index_mask_nospec
+static inline unsigned long array_index_mask_nospec(unsigned long index,
+ unsigned long size)
+{
+ /*
+ * Warn developers about inappropriate array_index_nospec() usage.
+ *
+ * Even if the CPU speculates past the WARN_ONCE branch, the
+ * sign bit of @index is taken into account when generating the
+ * mask.
+ *
+ * This warning is compiled out when the compiler can infer that
+ * @index and @size are less than LONG_MAX.
+ */
+ if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,
+ "array_index_nospec() limited to range of [0, LONG_MAX]\n"))
+ return 0;
+
+ /*
+ * Always calculate and emit the mask even if the compiler
+ * thinks the mask is not needed. The compiler does not take
+ * into account the value of @index under speculation.
+ */
+ OPTIMIZER_HIDE_VAR(index);
+ return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1);
+}
+#endif
+
+/*
+ * array_index_nospec - sanitize an array index after a bounds check
+ *
+ * For a code sequence like:
+ *
+ * if (index < size) {
+ * index = array_index_nospec(index, size);
+ * val = array[index];
+ * }
+ *
+ * ...if the CPU speculates past the bounds check then
+ * array_index_nospec() will clamp the index within the range of [0,
+ * size).
+ */
+#define array_index_nospec(index, size) \
+({ \
+ typeof(index) _i = (index); \
+ typeof(size) _s = (size); \
+ unsigned long _mask = array_index_mask_nospec(_i, _s); \
+ \
+ BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \
+ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \
+ \
+ _i &= _mask; \
+ _i; \
+})
+#endif /* _LINUX_NOSPEC_H */
diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h
index 35d0fd7..e821a31 100644
--- a/include/linux/omap-gpmc.h
+++ b/include/linux/omap-gpmc.h
@@ -88,10 +88,11 @@
#endif
#if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
-extern void gpmc_onenand_init(struct omap_onenand_platform_data *d);
+extern int gpmc_onenand_init(struct omap_onenand_platform_data *d);
#else
#define board_onenand_data NULL
-static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d)
+static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d)
{
+ return 0;
}
#endif
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 8f16299..8902f23 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -35,6 +35,12 @@
#endif
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#define USER_MAPPED_SECTION "..user_mapped"
+#else
+#define USER_MAPPED_SECTION ""
+#endif
+
/*
* Base implementations of per-CPU variable declarations and definitions, where
* the section in which the variable is to be placed is provided by the
@@ -115,6 +121,12 @@
#define DEFINE_PER_CPU(type, name) \
DEFINE_PER_CPU_SECTION(type, name, "")
+#define DECLARE_PER_CPU_USER_MAPPED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
+
+#define DEFINE_PER_CPU_USER_MAPPED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
+
/*
* Declaration/definition used for per-CPU variables that must come first in
* the set of variables.
@@ -144,6 +156,14 @@
DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
____cacheline_aligned_in_smp
+#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
+
+#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
+ ____cacheline_aligned_in_smp
+
#define DECLARE_PER_CPU_ALIGNED(type, name) \
DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \
____cacheline_aligned
@@ -162,11 +182,21 @@
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
DEFINE_PER_CPU_SECTION(type, name, "..page_aligned") \
__aligned(PAGE_SIZE)
+/*
+ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode.
+ */
+#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
+ __aligned(PAGE_SIZE)
+
+#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
+ __aligned(PAGE_SIZE)
/*
* Declaration/definition used for per-CPU variables that must be read mostly.
*/
-#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \
+#define DECLARE_PER_CPU_READ_MOSTLY(type, name) \
DECLARE_PER_CPU_SECTION(type, name, "..read_mostly")
#define DEFINE_PER_CPU_READ_MOSTLY(type, name) \
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 531b8b1..61ab566 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1264,6 +1264,7 @@
extern void perf_event_disable_local(struct perf_event *event);
extern void perf_event_disable_inatomic(struct perf_event *event);
extern void perf_event_task_tick(void);
+extern int perf_event_account_interrupt(struct perf_event *event);
#else /* !CONFIG_PERF_EVENTS: */
static inline void *
perf_aux_output_begin(struct perf_output_handle *handle,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index a04d69a..867110c 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -684,6 +684,17 @@
}
/**
+ * phy_interface_mode_is_rgmii - Convenience function for testing if a
+ * PHY interface mode is RGMII (all variants)
+ * @mode: the phy_interface_t enum
+ */
+static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode)
+{
+ return mode >= PHY_INTERFACE_MODE_RGMII &&
+ mode <= PHY_INTERFACE_MODE_RGMII_TXID;
+};
+
+/**
* phy_interface_is_rgmii - Convenience function for testing if a PHY interface
* is RGMII (all variants)
* @phydev: the phy_device struct
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index b83507c..e38f471 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -99,12 +99,18 @@
/* Note: callers invoking this in a loop must use a compiler barrier,
* for example cpu_relax(). Callers must hold producer_lock.
+ * Callers are responsible for making sure pointer that is being queued
+ * points to a valid data.
*/
static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
{
if (unlikely(!r->size) || r->queue[r->producer])
return -ENOSPC;
+ /* Make sure the pointer we are storing points to a valid data. */
+ /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
+ smp_wmb();
+
r->queue[r->producer++] = ptr;
if (unlikely(r->producer >= r->size))
r->producer = 0;
@@ -244,6 +250,9 @@
if (ptr)
__ptr_ring_discard_one(r);
+ /* Make sure anyone accessing data through the pointer is up to date. */
+ /* Pairs with smp_wmb in __ptr_ring_produce. */
+ smp_read_barrier_depends();
return ptr;
}
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 4ae95f7..6224a0a 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -100,44 +100,6 @@
}
/**
- * hlist_nulls_add_tail_rcu
- * @n: the element to add to the hash list.
- * @h: the list to add to.
- *
- * Description:
- * Adds the specified element to the end of the specified hlist_nulls,
- * while permitting racing traversals. NOTE: tail insertion requires
- * list traversal.
- *
- * The caller must take whatever precautions are necessary
- * (such as holding appropriate locks) to avoid racing
- * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
- * or hlist_nulls_del_rcu(), running on this same list.
- * However, it is perfectly legal to run concurrently with
- * the _rcu list-traversal primitives, such as
- * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
- * problems on Alpha CPUs. Regardless of the type of CPU, the
- * list-traversal primitive must be guarded by rcu_read_lock().
- */
-static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
- struct hlist_nulls_head *h)
-{
- struct hlist_nulls_node *i, *last = NULL;
-
- for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i);
- i = hlist_nulls_next_rcu(i))
- last = i;
-
- if (last) {
- n->next = last->next;
- n->pprev = &last->next;
- rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
- } else {
- hlist_nulls_add_head_rcu(n, h);
- }
-}
-
-/**
* hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_nulls_node to use as a loop cursor.
diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h
index f2e27e0..01b3778 100644
--- a/include/linux/sh_eth.h
+++ b/include/linux/sh_eth.h
@@ -16,7 +16,6 @@
unsigned char mac_addr[ETH_ALEN];
unsigned no_ether_link:1;
unsigned ether_link_active_low:1;
- unsigned needs_init:1;
};
#endif
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index c6f0f0d..00a1f33 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -116,6 +116,12 @@
.show = _name##_show, \
}
+#define __ATTR_RO_MODE(_name, _mode) { \
+ .attr = { .name = __stringify(_name), \
+ .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \
+ .show = _name##_show, \
+}
+
#define __ATTR_WO(_name) { \
.attr = { .name = __stringify(_name), .mode = S_IWUSR }, \
.store = _name##_store, \
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e3fe774..fc11641 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -220,7 +220,8 @@
u16 advmss; /* Advertised MSS */
u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */
fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
- unused:6;
+ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */
+ unused:5;
u8 nonagle : 4,/* Disable Nagle algorithm? */
thin_lto : 1,/* Use linear timeouts for thin streams */
thin_dupack : 1,/* Fast retransmit on first dupack */
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
new file mode 100644
index 0000000..a2b3dfc
--- /dev/null
+++ b/include/linux/tee_drv.h
@@ -0,0 +1,468 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __TEE_DRV_H
+#define __TEE_DRV_H
+
+#include <linux/types.h>
+#include <linux/idr.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/tee.h>
+
+/*
+ * The file describes the API provided by the generic TEE driver to the
+ * specific TEE driver.
+ */
+
+#define TEE_SHM_MAPPED BIT(0) /* Memory mapped by the kernel */
+#define TEE_SHM_DMA_BUF BIT(1) /* Memory with dma-buf handle */
+#define TEE_SHM_EXT_DMA_BUF BIT(2) /* Memory with dma-buf handle */
+#define TEE_SHM_REGISTER BIT(3) /* Memory registered in secure world */
+#define TEE_SHM_USER_MAPPED BIT(4) /* Memory mapped in user space */
+#define TEE_SHM_POOL BIT(5) /* Memory allocated from pool */
+
+struct device;
+struct tee_device;
+struct tee_shm;
+struct tee_shm_pool;
+
+/**
+ * struct tee_context - driver specific context on file pointer data
+ * @teedev: pointer to this drivers struct tee_device
+ * @list_shm: List of shared memory object owned by this context
+ * @data: driver specific context data, managed by the driver
+ * @refcount: reference counter for this structure
+ * @releasing: flag that indicates if context is being released right now.
+ * It is needed to break circular dependency on context during
+ * shared memory release.
+ */
+struct tee_context {
+ struct tee_device *teedev;
+ struct list_head list_shm;
+ void *data;
+ struct kref refcount;
+ bool releasing;
+};
+
+struct tee_param_memref {
+ size_t shm_offs;
+ size_t size;
+ struct tee_shm *shm;
+};
+
+struct tee_param_value {
+ u64 a;
+ u64 b;
+ u64 c;
+};
+
+struct tee_param {
+ u64 attr;
+ union {
+ struct tee_param_memref memref;
+ struct tee_param_value value;
+ } u;
+};
+
+/**
+ * struct tee_driver_ops - driver operations vtable
+ * @get_version: returns version of driver
+ * @open: called when the device file is opened
+ * @release: release this open file
+ * @open_session: open a new session
+ * @close_session: close a session
+ * @invoke_func: invoke a trusted function
+ * @cancel_req: request cancel of an ongoing invoke or open
+ * @supp_revc: called for supplicant to get a command
+ * @supp_send: called for supplicant to send a response
+ * @shm_register: register shared memory buffer in TEE
+ * @shm_unregister: unregister shared memory buffer in TEE
+ */
+struct tee_driver_ops {
+ void (*get_version)(struct tee_device *teedev,
+ struct tee_ioctl_version_data *vers);
+ int (*open)(struct tee_context *ctx);
+ void (*release)(struct tee_context *ctx);
+ int (*open_session)(struct tee_context *ctx,
+ struct tee_ioctl_open_session_arg *arg,
+ struct tee_param *param);
+ int (*close_session)(struct tee_context *ctx, u32 session);
+ int (*invoke_func)(struct tee_context *ctx,
+ struct tee_ioctl_invoke_arg *arg,
+ struct tee_param *param);
+ int (*cancel_req)(struct tee_context *ctx, u32 cancel_id, u32 session);
+ int (*supp_recv)(struct tee_context *ctx, u32 *func, u32 *num_params,
+ struct tee_param *param);
+ int (*supp_send)(struct tee_context *ctx, u32 ret, u32 num_params,
+ struct tee_param *param);
+ int (*shm_register)(struct tee_context *ctx, struct tee_shm *shm,
+ struct page **pages, size_t num_pages,
+ unsigned long start);
+ int (*shm_unregister)(struct tee_context *ctx, struct tee_shm *shm);
+};
+
+/**
+ * struct tee_desc - Describes the TEE driver to the subsystem
+ * @name: name of driver
+ * @ops: driver operations vtable
+ * @owner: module providing the driver
+ * @flags: Extra properties of driver, defined by TEE_DESC_* below
+ */
+#define TEE_DESC_PRIVILEGED 0x1
+struct tee_desc {
+ const char *name;
+ const struct tee_driver_ops *ops;
+ struct module *owner;
+ u32 flags;
+};
+
+/**
+ * tee_device_alloc() - Allocate a new struct tee_device instance
+ * @teedesc: Descriptor for this driver
+ * @dev: Parent device for this device
+ * @pool: Shared memory pool, NULL if not used
+ * @driver_data: Private driver data for this device
+ *
+ * Allocates a new struct tee_device instance. The device is
+ * removed by tee_device_unregister().
+ *
+ * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure
+ */
+struct tee_device *tee_device_alloc(const struct tee_desc *teedesc,
+ struct device *dev,
+ struct tee_shm_pool *pool,
+ void *driver_data);
+
+/**
+ * tee_device_register() - Registers a TEE device
+ * @teedev: Device to register
+ *
+ * tee_device_unregister() need to be called to remove the @teedev if
+ * this function fails.
+ *
+ * @returns < 0 on failure
+ */
+int tee_device_register(struct tee_device *teedev);
+
+/**
+ * tee_device_unregister() - Removes a TEE device
+ * @teedev: Device to unregister
+ *
+ * This function should be called to remove the @teedev even if
+ * tee_device_register() hasn't been called yet. Does nothing if
+ * @teedev is NULL.
+ */
+void tee_device_unregister(struct tee_device *teedev);
+
+/**
+ * struct tee_shm - shared memory object
+ * @teedev: device used to allocate the object
+ * @ctx: context using the object, if NULL the context is gone
+ * @link link element
+ * @paddr: physical address of the shared memory
+ * @kaddr: virtual address of the shared memory
+ * @size: size of shared memory
+ * @offset: offset of buffer in user space
+ * @pages: locked pages from userspace
+ * @num_pages: number of locked pages
+ * @dmabuf: dmabuf used to for exporting to user space
+ * @flags: defined by TEE_SHM_* in tee_drv.h
+ * @id: unique id of a shared memory object on this device
+ *
+ * This pool is only supposed to be accessed directly from the TEE
+ * subsystem and from drivers that implements their own shm pool manager.
+ */
+struct tee_shm {
+ struct tee_device *teedev;
+ struct tee_context *ctx;
+ struct list_head link;
+ phys_addr_t paddr;
+ void *kaddr;
+ size_t size;
+ unsigned int offset;
+ struct page **pages;
+ size_t num_pages;
+ struct dma_buf *dmabuf;
+ u32 flags;
+ int id;
+};
+
+/**
+ * struct tee_shm_pool_mgr - shared memory manager
+ * @ops: operations
+ * @private_data: private data for the shared memory manager
+ */
+struct tee_shm_pool_mgr {
+ const struct tee_shm_pool_mgr_ops *ops;
+ void *private_data;
+};
+
+/**
+ * struct tee_shm_pool_mgr_ops - shared memory pool manager operations
+ * @alloc: called when allocating shared memory
+ * @free: called when freeing shared memory
+ * @destroy_poolmgr: called when destroying the pool manager
+ */
+struct tee_shm_pool_mgr_ops {
+ int (*alloc)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm,
+ size_t size);
+ void (*free)(struct tee_shm_pool_mgr *poolmgr, struct tee_shm *shm);
+ void (*destroy_poolmgr)(struct tee_shm_pool_mgr *poolmgr);
+};
+
+/**
+ * tee_shm_pool_alloc() - Create a shared memory pool from shm managers
+ * @priv_mgr: manager for driver private shared memory allocations
+ * @dmabuf_mgr: manager for dma-buf shared memory allocations
+ *
+ * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied
+ * in @dmabuf, others will use the range provided by @priv.
+ *
+ * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure.
+ */
+struct tee_shm_pool *tee_shm_pool_alloc(struct tee_shm_pool_mgr *priv_mgr,
+ struct tee_shm_pool_mgr *dmabuf_mgr);
+
+/*
+ * tee_shm_pool_mgr_alloc_res_mem() - Create a shm manager for reserved
+ * memory
+ * @vaddr: Virtual address of start of pool
+ * @paddr: Physical address of start of pool
+ * @size: Size in bytes of the pool
+ *
+ * @returns pointer to a 'struct tee_shm_pool_mgr' or an ERR_PTR on failure.
+ */
+struct tee_shm_pool_mgr *tee_shm_pool_mgr_alloc_res_mem(unsigned long vaddr,
+ phys_addr_t paddr,
+ size_t size,
+ int min_alloc_order);
+
+/**
+ * tee_shm_pool_mgr_destroy() - Free a shared memory manager
+ */
+static inline void tee_shm_pool_mgr_destroy(struct tee_shm_pool_mgr *poolm)
+{
+ poolm->ops->destroy_poolmgr(poolm);
+}
+
+/**
+ * struct tee_shm_pool_mem_info - holds information needed to create a shared
+ * memory pool
+ * @vaddr: Virtual address of start of pool
+ * @paddr: Physical address of start of pool
+ * @size: Size in bytes of the pool
+ */
+struct tee_shm_pool_mem_info {
+ unsigned long vaddr;
+ phys_addr_t paddr;
+ size_t size;
+};
+
+/**
+ * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved
+ * memory range
+ * @priv_info: Information for driver private shared memory pool
+ * @dmabuf_info: Information for dma-buf shared memory pool
+ *
+ * Start and end of pools will must be page aligned.
+ *
+ * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied
+ * in @dmabuf, others will use the range provided by @priv.
+ *
+ * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure.
+ */
+struct tee_shm_pool *
+tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info,
+ struct tee_shm_pool_mem_info *dmabuf_info);
+
+/**
+ * tee_shm_pool_free() - Free a shared memory pool
+ * @pool: The shared memory pool to free
+ *
+ * The must be no remaining shared memory allocated from this pool when
+ * this function is called.
+ */
+void tee_shm_pool_free(struct tee_shm_pool *pool);
+
+/**
+ * tee_get_drvdata() - Return driver_data pointer
+ * @returns the driver_data pointer supplied to tee_register().
+ */
+void *tee_get_drvdata(struct tee_device *teedev);
+
+/**
+ * tee_shm_alloc() - Allocate shared memory
+ * @ctx: Context that allocates the shared memory
+ * @size: Requested size of shared memory
+ * @flags: Flags setting properties for the requested shared memory.
+ *
+ * Memory allocated as global shared memory is automatically freed when the
+ * TEE file pointer is closed. The @flags field uses the bits defined by
+ * TEE_SHM_* above. TEE_SHM_MAPPED must currently always be set. If
+ * TEE_SHM_DMA_BUF global shared memory will be allocated and associated
+ * with a dma-buf handle, else driver private memory.
+ *
+ * @returns a pointer to 'struct tee_shm'
+ */
+struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags);
+
+/**
+ * tee_shm_priv_alloc() - Allocate shared memory privately
+ * @dev: Device that allocates the shared memory
+ * @size: Requested size of shared memory
+ *
+ * Allocates shared memory buffer that is not associated with any client
+ * context. Such buffers are owned by TEE driver and used for internal calls.
+ *
+ * @returns a pointer to 'struct tee_shm'
+ */
+struct tee_shm *tee_shm_priv_alloc(struct tee_device *teedev, size_t size);
+
+/**
+ * tee_shm_register() - Register shared memory buffer
+ * @ctx: Context that registers the shared memory
+ * @addr: Address is userspace of the shared buffer
+ * @length: Length of the shared buffer
+ * @flags: Flags setting properties for the requested shared memory.
+ *
+ * @returns a pointer to 'struct tee_shm'
+ */
+struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr,
+ size_t length, u32 flags);
+
+/**
+ * tee_shm_is_registered() - Check if shared memory object in registered in TEE
+ * @shm: Shared memory handle
+ * @returns true if object is registered in TEE
+ */
+static inline bool tee_shm_is_registered(struct tee_shm *shm)
+{
+ return shm && (shm->flags & TEE_SHM_REGISTER);
+}
+
+/**
+ * tee_shm_free() - Free shared memory
+ * @shm: Handle to shared memory to free
+ */
+void tee_shm_free(struct tee_shm *shm);
+
+/**
+ * tee_shm_put() - Decrease reference count on a shared memory handle
+ * @shm: Shared memory handle
+ */
+void tee_shm_put(struct tee_shm *shm);
+
+/**
+ * tee_shm_va2pa() - Get physical address of a virtual address
+ * @shm: Shared memory handle
+ * @va: Virtual address to tranlsate
+ * @pa: Returned physical address
+ * @returns 0 on success and < 0 on failure
+ */
+int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa);
+
+/**
+ * tee_shm_pa2va() - Get virtual address of a physical address
+ * @shm: Shared memory handle
+ * @pa: Physical address to tranlsate
+ * @va: Returned virtual address
+ * @returns 0 on success and < 0 on failure
+ */
+int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va);
+
+/**
+ * tee_shm_get_va() - Get virtual address of a shared memory plus an offset
+ * @shm: Shared memory handle
+ * @offs: Offset from start of this shared memory
+ * @returns virtual address of the shared memory + offs if offs is within
+ * the bounds of this shared memory, else an ERR_PTR
+ */
+void *tee_shm_get_va(struct tee_shm *shm, size_t offs);
+
+/**
+ * tee_shm_get_pa() - Get physical address of a shared memory plus an offset
+ * @shm: Shared memory handle
+ * @offs: Offset from start of this shared memory
+ * @pa: Physical address to return
+ * @returns 0 if offs is within the bounds of this shared memory, else an
+ * error code.
+ */
+int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa);
+
+/**
+ * tee_shm_get_size() - Get size of shared memory buffer
+ * @shm: Shared memory handle
+ * @returns size of shared memory
+ */
+static inline size_t tee_shm_get_size(struct tee_shm *shm)
+{
+ return shm->size;
+}
+
+/**
+ * tee_shm_get_pages() - Get list of pages that hold shared buffer
+ * @shm: Shared memory handle
+ * @num_pages: Number of pages will be stored there
+ * @returns pointer to pages array
+ */
+static inline struct page **tee_shm_get_pages(struct tee_shm *shm,
+ size_t *num_pages)
+{
+ *num_pages = shm->num_pages;
+ return shm->pages;
+}
+
+/**
+ * tee_shm_get_page_offset() - Get shared buffer offset from page start
+ * @shm: Shared memory handle
+ * @returns page offset of shared buffer
+ */
+static inline size_t tee_shm_get_page_offset(struct tee_shm *shm)
+{
+ return shm->offset;
+}
+
+/**
+ * tee_shm_get_id() - Get id of a shared memory object
+ * @shm: Shared memory handle
+ * @returns id
+ */
+static inline int tee_shm_get_id(struct tee_shm *shm)
+{
+ return shm->id;
+}
+
+/**
+ * tee_shm_get_from_id() - Find shared memory object and increase reference
+ * count
+ * @ctx: Context owning the shared memory
+ * @id: Id of shared memory object
+ * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure
+ */
+struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id);
+
+static inline bool tee_param_is_memref(struct tee_param *param)
+{
+ switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) {
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT:
+ case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+#endif /*__TEE_DRV_H*/
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 51d601f..ec86e4e 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -274,9 +274,11 @@
unsigned long round_jiffies_up_relative(unsigned long j);
#ifdef CONFIG_HOTPLUG_CPU
+int timers_prepare_cpu(unsigned int cpu);
int timers_dead_cpu(unsigned int cpu);
#else
-#define timers_dead_cpu NULL
+#define timers_prepare_cpu NULL
+#define timers_dead_cpu NULL
#endif
#endif
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 40144f3..a41244f 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -394,6 +394,8 @@
/* tty_io.c */
extern int __init tty_init(void);
extern const char *tty_name(const struct tty_struct *tty);
+extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout);
+extern void tty_ldisc_unlock(struct tty_struct *tty);
#else
static inline void console_init(void)
{ }
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 6e0ce8c..fde7550 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -79,6 +79,7 @@
# define EVENT_RX_KILL 10
# define EVENT_LINK_CHANGE 11
# define EVENT_SET_RX_MODE 12
+# define EVENT_NO_IP_ALIGN 13
};
static inline struct usb_driver *driver_of(struct usb_interface *intf)
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 9638bfe..584f9a6 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -48,6 +48,8 @@
struct virtio_vsock_hdr hdr;
struct work_struct work;
struct list_head list;
+ /* socket refcnt not held, only use for cancellation */
+ struct vsock_sock *vsk;
void *buf;
u32 len;
u32 off;
@@ -56,6 +58,7 @@
struct virtio_vsock_pkt_info {
u32 remote_cid, remote_port;
+ struct vsock_sock *vsk;
struct msghdr *msg;
u32 pkt_len;
u16 type;
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 4d6ec58..2edb150 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -89,10 +89,8 @@
#endif
#endif
#ifdef CONFIG_DEBUG_TLBFLUSH
-#ifdef CONFIG_SMP
NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */
NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
-#endif /* CONFIG_SMP */
NR_TLB_LOCAL_FLUSH_ALL,
NR_TLB_LOCAL_FLUSH_ONE,
#endif /* CONFIG_DEBUG_TLBFLUSH */
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index f275896..f32ed9a 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -100,6 +100,9 @@
void (*destruct)(struct vsock_sock *);
void (*release)(struct vsock_sock *);
+ /* Cancel all pending packets sent on vsock. */
+ int (*cancel_pkt)(struct vsock_sock *vsk);
+
/* Connections. */
int (*connect)(struct vsock_sock *);
diff --git a/include/net/arp.h b/include/net/arp.h
index 5e0f891..1b3f869 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -19,6 +19,9 @@
static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key)
{
+ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+ key = INADDR_ANY;
+
return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev);
}
diff --git a/include/net/ip.h b/include/net/ip.h
index 3591ddc..573fd7f 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -33,6 +33,8 @@
#include <net/flow.h>
#include <net/flow_dissector.h>
+#define IPV4_MIN_MTU 68 /* RFC 791 */
+
struct sock;
struct inet_skb_parm {
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 615ce0a..e64210c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -290,6 +290,7 @@
int flags);
int ip6_flowlabel_init(void);
void ip6_flowlabel_cleanup(void);
+bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np);
static inline void fl6_sock_release(struct ip6_flowlabel *fl)
{
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2c7d876..8fd61bc 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1007,7 +1007,7 @@
* @RX_FLAG_DECRYPTED: This frame was decrypted in hardware.
* @RX_FLAG_MMIC_STRIPPED: the Michael MIC is stripped off this frame,
* verification has been done by the hardware.
- * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame.
+ * @RX_FLAG_IV_STRIPPED: The IV and ICV are stripped from this frame.
* If this flag is set, the stack cannot do any replay detection
* hence the driver or hardware will have to do that.
* @RX_FLAG_PN_VALIDATED: Currently only valid for CCMP/GCMP frames, this
@@ -1078,6 +1078,8 @@
* @RX_FLAG_ALLOW_SAME_PN: Allow the same PN as same packet before.
* This is used for AMSDU subframes which can have the same PN as
* the first subframe.
+ * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must
+ * be done in the hardware.
*/
enum mac80211_rx_flags {
RX_FLAG_MMIC_ERROR = BIT(0),
@@ -1113,6 +1115,7 @@
RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(31),
RX_FLAG_MIC_STRIPPED = BIT_ULL(32),
RX_FLAG_ALLOW_SAME_PN = BIT_ULL(33),
+ RX_FLAG_ICV_STRIPPED = BIT_ULL(34),
};
#define RX_FLAG_STBC_SHIFT 26
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 0940598..23102da 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -213,6 +213,11 @@
return net1 == net2;
}
+static inline int check_net(const struct net *net)
+{
+ return atomic_read(&net->count) != 0;
+}
+
void net_drop_ns(void *);
#else
@@ -237,6 +242,11 @@
return 1;
}
+static inline int check_net(const struct net *net)
+{
+ return 1;
+}
+
#define net_drop_ns NULL
#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 97f8ed2..badd144 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -649,11 +649,7 @@
static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
{
- if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
- sk->sk_family == AF_INET6)
- hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
- else
- hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+ hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
}
static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eaf8144..d082f11 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1002,7 +1002,7 @@
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs);
void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- struct skb_mstamp *now, struct rate_sample *rs);
+ bool is_sack_reneg, struct skb_mstamp *now, struct rate_sample *rs);
void tcp_rate_check_app_limited(struct sock *sk);
/* These functions determine how the current flow behaves in respect of SACK
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 1beab55..818a38f 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -243,10 +243,11 @@
static inline enum ib_mtu iboe_get_mtu(int mtu)
{
/*
- * reduce IB headers from effective IBoE MTU. 28 stands for
- * atomic header which is the biggest possible header after BTH
+ * Reduce IB headers from effective IBoE MTU.
*/
- mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28;
+ mtu = mtu - (IB_GRH_BYTES + IB_UDP_BYTES + IB_BTH_BYTES +
+ IB_EXT_XRC_BYTES + IB_EXT_ATOMICETH_BYTES +
+ IB_ICRC_BYTES);
if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096))
return IB_MTU_4096;
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index b13419c..e02b78a 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -37,14 +37,17 @@
#include <uapi/linux/if_ether.h>
enum {
- IB_LRH_BYTES = 8,
- IB_ETH_BYTES = 14,
- IB_VLAN_BYTES = 4,
- IB_GRH_BYTES = 40,
- IB_IP4_BYTES = 20,
- IB_UDP_BYTES = 8,
- IB_BTH_BYTES = 12,
- IB_DETH_BYTES = 8
+ IB_LRH_BYTES = 8,
+ IB_ETH_BYTES = 14,
+ IB_VLAN_BYTES = 4,
+ IB_GRH_BYTES = 40,
+ IB_IP4_BYTES = 20,
+ IB_UDP_BYTES = 8,
+ IB_BTH_BYTES = 12,
+ IB_DETH_BYTES = 8,
+ IB_EXT_ATOMICETH_BYTES = 28,
+ IB_EXT_XRC_BYTES = 4,
+ IB_ICRC_BYTES = 4
};
struct ib_field {
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index dae99d7..706a701 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -165,11 +165,11 @@
struct sata_device {
unsigned int class;
- struct smp_resp rps_resp; /* report_phy_sata_resp */
u8 port_no; /* port number, if this is a PM (Port) */
struct ata_port *ap;
struct ata_host ata_host;
+ struct smp_resp rps_resp ____cacheline_aligned; /* report_phy_sata_resp */
u8 fis[ATA_RESP_FIS_SIZE];
};
diff --git a/include/scsi/sg.h b/include/scsi/sg.h
index 3afec70..20bc71c 100644
--- a/include/scsi/sg.h
+++ b/include/scsi/sg.h
@@ -197,7 +197,6 @@
#define SG_DEFAULT_RETRIES 0
/* Defaults, commented if they differ from original sg driver */
-#define SG_DEF_FORCE_LOW_DMA 0 /* was 1 -> memory below 16MB on i386 */
#define SG_DEF_FORCE_PACK_ID 0
#define SG_DEF_KEEP_ORPHAN 0
#define SG_DEF_RESERVED_SIZE SG_SCATTER_SZ /* load time option */
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index a87e894..30f99ce 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -297,7 +297,7 @@
struct list_head tg_pt_gp_lun_list;
struct se_lun *tg_pt_gp_alua_lun;
struct se_node_acl *tg_pt_gp_alua_nacl;
- struct delayed_work tg_pt_gp_transition_work;
+ struct work_struct tg_pt_gp_transition_work;
struct completion *tg_pt_gp_transition_complete;
};
@@ -493,6 +493,7 @@
#define CMD_T_BUSY (1 << 9)
#define CMD_T_TAS (1 << 10)
#define CMD_T_FABRIC_STOP (1 << 11)
+#define CMD_T_PRE_EXECUTE (1 << 12)
spinlock_t t_state_lock;
struct kref cmd_kref;
struct completion t_transport_stop_comp;
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 8ade3eb..90fce4d 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -208,7 +208,7 @@
{ KVM_TRACE_MMIO_WRITE, "write" }
TRACE_EVENT(kvm_mmio,
- TP_PROTO(int type, int len, u64 gpa, u64 val),
+ TP_PROTO(int type, int len, u64 gpa, void *val),
TP_ARGS(type, len, gpa, val),
TP_STRUCT__entry(
@@ -222,7 +222,10 @@
__entry->type = type;
__entry->len = len;
__entry->gpa = gpa;
- __entry->val = val;
+ __entry->val = 0;
+ if (val)
+ memcpy(&__entry->val, val,
+ min_t(u32, sizeof(__entry->val), len));
),
TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 22b6ad3..8562b1c 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -90,7 +90,7 @@
#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1)
-#define PTR(gen, offset, dev) \
+#define MAKE_PTR(gen, offset, dev) \
((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen)
/* Bkey utility code */
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index 1c31549..bc96b14 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -26,6 +26,19 @@
#define EPOLL_CTL_DEL 2
#define EPOLL_CTL_MOD 3
+/* Epoll event masks */
+#define EPOLLIN 0x00000001
+#define EPOLLPRI 0x00000002
+#define EPOLLOUT 0x00000004
+#define EPOLLERR 0x00000008
+#define EPOLLHUP 0x00000010
+#define EPOLLRDNORM 0x00000040
+#define EPOLLRDBAND 0x00000080
+#define EPOLLWRNORM 0x00000100
+#define EPOLLWRBAND 0x00000200
+#define EPOLLMSG 0x00000400
+#define EPOLLRDHUP 0x00002000
+
/* Set exclusive wakeup mode for the target file descriptor */
#define EPOLLEXCLUSIVE (1 << 28)
diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h
index 574e22e..33b826b 100644
--- a/include/uapi/linux/kcov.h
+++ b/include/uapi/linux/kcov.h
@@ -7,4 +7,28 @@
#define KCOV_ENABLE _IO('c', 100)
#define KCOV_DISABLE _IO('c', 101)
+enum {
+ /*
+ * Tracing coverage collection mode.
+ * Covered PCs are collected in a per-task buffer.
+ * In new KCOV version the mode is chosen by calling
+ * ioctl(fd, KCOV_ENABLE, mode). In older versions the mode argument
+ * was supposed to be 0 in such a call. So, for reasons of backward
+ * compatibility, we have chosen the value KCOV_TRACE_PC to be 0.
+ */
+ KCOV_TRACE_PC = 0,
+ /* Collecting comparison operands mode. */
+ KCOV_TRACE_CMP = 1,
+};
+
+/*
+ * The format for the types of collected comparisons.
+ *
+ * Bit 0 shows whether one of the arguments is a compile-time constant.
+ * Bits 1 & 2 contain log2 of the argument size, up to 8 bytes.
+ */
+#define KCOV_CMP_CONST (1 << 0)
+#define KCOV_CMP_SIZE(n) ((n) << 1)
+#define KCOV_CMP_MASK KCOV_CMP_SIZE(3)
+
#endif /* _LINUX_KCOV_IOCTLS_H */
diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h
new file mode 100644
index 0000000..4b9eb06
--- /dev/null
+++ b/include/uapi/linux/tee.h
@@ -0,0 +1,384 @@
+/*
+ * Copyright (c) 2015-2016, Linaro Limited
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __TEE_H
+#define __TEE_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * This file describes the API provided by a TEE driver to user space.
+ *
+ * Each TEE driver defines a TEE specific protocol which is used for the
+ * data passed back and forth using TEE_IOC_CMD.
+ */
+
+/* Helpers to make the ioctl defines */
+#define TEE_IOC_MAGIC 0xa4
+#define TEE_IOC_BASE 0
+
+/* Flags relating to shared memory */
+#define TEE_IOCTL_SHM_MAPPED 0x1 /* memory mapped in normal world */
+#define TEE_IOCTL_SHM_DMA_BUF 0x2 /* dma-buf handle on shared memory */
+
+#define TEE_MAX_ARG_SIZE 1024
+
+#define TEE_GEN_CAP_GP (1 << 0)/* GlobalPlatform compliant TEE */
+#define TEE_GEN_CAP_PRIVILEGED (1 << 1)/* Privileged device (for supplicant) */
+#define TEE_GEN_CAP_REG_MEM (1 << 2)/* Supports registering shared memory */
+
+/*
+ * TEE Implementation ID
+ */
+#define TEE_IMPL_ID_OPTEE 1
+
+/*
+ * OP-TEE specific capabilities
+ */
+#define TEE_OPTEE_CAP_TZ (1 << 0)
+
+/**
+ * struct tee_ioctl_version_data - TEE version
+ * @impl_id: [out] TEE implementation id
+ * @impl_caps: [out] Implementation specific capabilities
+ * @gen_caps: [out] Generic capabilities, defined by TEE_GEN_CAPS_* above
+ *
+ * Identifies the TEE implementation, @impl_id is one of TEE_IMPL_ID_* above.
+ * @impl_caps is implementation specific, for example TEE_OPTEE_CAP_*
+ * is valid when @impl_id == TEE_IMPL_ID_OPTEE.
+ */
+struct tee_ioctl_version_data {
+ __u32 impl_id;
+ __u32 impl_caps;
+ __u32 gen_caps;
+};
+
+/**
+ * TEE_IOC_VERSION - query version of TEE
+ *
+ * Takes a tee_ioctl_version_data struct and returns with the TEE version
+ * data filled in.
+ */
+#define TEE_IOC_VERSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 0, \
+ struct tee_ioctl_version_data)
+
+/**
+ * struct tee_ioctl_shm_alloc_data - Shared memory allocate argument
+ * @size: [in/out] Size of shared memory to allocate
+ * @flags: [in/out] Flags to/from allocation.
+ * @id: [out] Identifier of the shared memory
+ *
+ * The flags field should currently be zero as input. Updated by the call
+ * with actual flags as defined by TEE_IOCTL_SHM_* above.
+ * This structure is used as argument for TEE_IOC_SHM_ALLOC below.
+ */
+struct tee_ioctl_shm_alloc_data {
+ __u64 size;
+ __u32 flags;
+ __s32 id;
+};
+
+/**
+ * TEE_IOC_SHM_ALLOC - allocate shared memory
+ *
+ * Allocates shared memory between the user space process and secure OS.
+ *
+ * Returns a file descriptor on success or < 0 on failure
+ *
+ * The returned file descriptor is used to map the shared memory into user
+ * space. The shared memory is freed when the descriptor is closed and the
+ * memory is unmapped.
+ */
+#define TEE_IOC_SHM_ALLOC _IOWR(TEE_IOC_MAGIC, TEE_IOC_BASE + 1, \
+ struct tee_ioctl_shm_alloc_data)
+
+/**
+ * struct tee_ioctl_buf_data - Variable sized buffer
+ * @buf_ptr: [in] A __user pointer to a buffer
+ * @buf_len: [in] Length of the buffer above
+ *
+ * Used as argument for TEE_IOC_OPEN_SESSION, TEE_IOC_INVOKE,
+ * TEE_IOC_SUPPL_RECV, and TEE_IOC_SUPPL_SEND below.
+ */
+struct tee_ioctl_buf_data {
+ __u64 buf_ptr;
+ __u64 buf_len;
+};
+
+/*
+ * Attributes for struct tee_ioctl_param, selects field in the union
+ */
+#define TEE_IOCTL_PARAM_ATTR_TYPE_NONE 0 /* parameter not used */
+
+/*
+ * These defines value parameters (struct tee_ioctl_param_value)
+ */
+#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT 1
+#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT 2
+#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT 3 /* input and output */
+
+/*
+ * These defines shared memory reference parameters (struct
+ * tee_ioctl_param_memref)
+ */
+#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT 5
+#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT 6
+#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT 7 /* input and output */
+
+/*
+ * Mask for the type part of the attribute, leaves room for more types
+ */
+#define TEE_IOCTL_PARAM_ATTR_TYPE_MASK 0xff
+
+/* Meta parameter carrying extra information about the message. */
+#define TEE_IOCTL_PARAM_ATTR_META 0x100
+
+/* Mask of all known attr bits */
+#define TEE_IOCTL_PARAM_ATTR_MASK \
+ (TEE_IOCTL_PARAM_ATTR_TYPE_MASK | TEE_IOCTL_PARAM_ATTR_META)
+
+/*
+ * Matches TEEC_LOGIN_* in GP TEE Client API
+ * Are only defined for GP compliant TEEs
+ */
+#define TEE_IOCTL_LOGIN_PUBLIC 0
+#define TEE_IOCTL_LOGIN_USER 1
+#define TEE_IOCTL_LOGIN_GROUP 2
+#define TEE_IOCTL_LOGIN_APPLICATION 4
+#define TEE_IOCTL_LOGIN_USER_APPLICATION 5
+#define TEE_IOCTL_LOGIN_GROUP_APPLICATION 6
+
+/**
+ * struct tee_ioctl_param - parameter
+ * @attr: attributes
+ * @a: if a memref, offset into the shared memory object, else a value parameter
+ * @b: if a memref, size of the buffer, else a value parameter
+ * @c: if a memref, shared memory identifier, else a value parameter
+ *
+ * @attr & TEE_PARAM_ATTR_TYPE_MASK indicates if memref or value is used in
+ * the union. TEE_PARAM_ATTR_TYPE_VALUE_* indicates value and
+ * TEE_PARAM_ATTR_TYPE_MEMREF_* indicates memref. TEE_PARAM_ATTR_TYPE_NONE
+ * indicates that none of the members are used.
+ *
+ * Shared memory is allocated with TEE_IOC_SHM_ALLOC which returns an
+ * identifier representing the shared memory object. A memref can reference
+ * a part of a shared memory by specifying an offset (@a) and size (@b) of
+ * the object. To supply the entire shared memory object set the offset
+ * (@a) to 0 and size (@b) to the previously returned size of the object.
+ */
+struct tee_ioctl_param {
+ __u64 attr;
+ __u64 a;
+ __u64 b;
+ __u64 c;
+};
+
+#define TEE_IOCTL_UUID_LEN 16
+
+/**
+ * struct tee_ioctl_open_session_arg - Open session argument
+ * @uuid: [in] UUID of the Trusted Application
+ * @clnt_uuid: [in] UUID of client
+ * @clnt_login: [in] Login class of client, TEE_IOCTL_LOGIN_* above
+ * @cancel_id: [in] Cancellation id, a unique value to identify this request
+ * @session: [out] Session id
+ * @ret: [out] return value
+ * @ret_origin [out] origin of the return value
+ * @num_params [in] number of parameters following this struct
+ */
+struct tee_ioctl_open_session_arg {
+ __u8 uuid[TEE_IOCTL_UUID_LEN];
+ __u8 clnt_uuid[TEE_IOCTL_UUID_LEN];
+ __u32 clnt_login;
+ __u32 cancel_id;
+ __u32 session;
+ __u32 ret;
+ __u32 ret_origin;
+ __u32 num_params;
+ /* num_params tells the actual number of element in params */
+ struct tee_ioctl_param params[];
+};
+
+/**
+ * TEE_IOC_OPEN_SESSION - opens a session to a Trusted Application
+ *
+ * Takes a struct tee_ioctl_buf_data which contains a struct
+ * tee_ioctl_open_session_arg followed by any array of struct
+ * tee_ioctl_param
+ */
+#define TEE_IOC_OPEN_SESSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 2, \
+ struct tee_ioctl_buf_data)
+
+/**
+ * struct tee_ioctl_invoke_func_arg - Invokes a function in a Trusted
+ * Application
+ * @func: [in] Trusted Application function, specific to the TA
+ * @session: [in] Session id
+ * @cancel_id: [in] Cancellation id, a unique value to identify this request
+ * @ret: [out] return value
+ * @ret_origin [out] origin of the return value
+ * @num_params [in] number of parameters following this struct
+ */
+struct tee_ioctl_invoke_arg {
+ __u32 func;
+ __u32 session;
+ __u32 cancel_id;
+ __u32 ret;
+ __u32 ret_origin;
+ __u32 num_params;
+ /* num_params tells the actual number of element in params */
+ struct tee_ioctl_param params[];
+};
+
+/**
+ * TEE_IOC_INVOKE - Invokes a function in a Trusted Application
+ *
+ * Takes a struct tee_ioctl_buf_data which contains a struct
+ * tee_invoke_func_arg followed by any array of struct tee_param
+ */
+#define TEE_IOC_INVOKE _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 3, \
+ struct tee_ioctl_buf_data)
+
+/**
+ * struct tee_ioctl_cancel_arg - Cancels an open session or invoke ioctl
+ * @cancel_id: [in] Cancellation id, a unique value to identify this request
+ * @session: [in] Session id, if the session is opened, else set to 0
+ */
+struct tee_ioctl_cancel_arg {
+ __u32 cancel_id;
+ __u32 session;
+};
+
+/**
+ * TEE_IOC_CANCEL - Cancels an open session or invoke
+ */
+#define TEE_IOC_CANCEL _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 4, \
+ struct tee_ioctl_cancel_arg)
+
+/**
+ * struct tee_ioctl_close_session_arg - Closes an open session
+ * @session: [in] Session id
+ */
+struct tee_ioctl_close_session_arg {
+ __u32 session;
+};
+
+/**
+ * TEE_IOC_CLOSE_SESSION - Closes a session
+ */
+#define TEE_IOC_CLOSE_SESSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 5, \
+ struct tee_ioctl_close_session_arg)
+
+/**
+ * struct tee_iocl_supp_recv_arg - Receive a request for a supplicant function
+ * @func: [in] supplicant function
+ * @num_params [in/out] number of parameters following this struct
+ *
+ * @num_params is the number of params that tee-supplicant has room to
+ * receive when input, @num_params is the number of actual params
+ * tee-supplicant receives when output.
+ */
+struct tee_iocl_supp_recv_arg {
+ __u32 func;
+ __u32 num_params;
+ /* num_params tells the actual number of element in params */
+ struct tee_ioctl_param params[];
+};
+
+/**
+ * TEE_IOC_SUPPL_RECV - Receive a request for a supplicant function
+ *
+ * Takes a struct tee_ioctl_buf_data which contains a struct
+ * tee_iocl_supp_recv_arg followed by any array of struct tee_param
+ */
+#define TEE_IOC_SUPPL_RECV _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 6, \
+ struct tee_ioctl_buf_data)
+
+/**
+ * struct tee_iocl_supp_send_arg - Send a response to a received request
+ * @ret: [out] return value
+ * @num_params [in] number of parameters following this struct
+ */
+struct tee_iocl_supp_send_arg {
+ __u32 ret;
+ __u32 num_params;
+ /* num_params tells the actual number of element in params */
+ struct tee_ioctl_param params[];
+};
+
+/**
+ * TEE_IOC_SUPPL_SEND - Receive a request for a supplicant function
+ *
+ * Takes a struct tee_ioctl_buf_data which contains a struct
+ * tee_iocl_supp_send_arg followed by any array of struct tee_param
+ */
+#define TEE_IOC_SUPPL_SEND _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 7, \
+ struct tee_ioctl_buf_data)
+
+/**
+ * struct tee_ioctl_shm_register_data - Shared memory register argument
+ * @addr: [in] Start address of shared memory to register
+ * @length: [in/out] Length of shared memory to register
+ * @flags: [in/out] Flags to/from registration.
+ * @id: [out] Identifier of the shared memory
+ *
+ * The flags field should currently be zero as input. Updated by the call
+ * with actual flags as defined by TEE_IOCTL_SHM_* above.
+ * This structure is used as argument for TEE_IOC_SHM_REGISTER below.
+ */
+struct tee_ioctl_shm_register_data {
+ __u64 addr;
+ __u64 length;
+ __u32 flags;
+ __s32 id;
+};
+
+/**
+ * TEE_IOC_SHM_REGISTER - Register shared memory argument
+ *
+ * Registers shared memory between the user space process and secure OS.
+ *
+ * Returns a file descriptor on success or < 0 on failure
+ *
+ * The shared memory is unregisterred when the descriptor is closed.
+ */
+#define TEE_IOC_SHM_REGISTER _IOWR(TEE_IOC_MAGIC, TEE_IOC_BASE + 9, \
+ struct tee_ioctl_shm_register_data)
+/*
+ * Five syscalls are used when communicating with the TEE driver.
+ * open(): opens the device associated with the driver
+ * ioctl(): as described above operating on the file descriptor from open()
+ * close(): two cases
+ * - closes the device file descriptor
+ * - closes a file descriptor connected to allocated shared memory
+ * mmap(): maps shared memory into user space using information from struct
+ * tee_ioctl_shm_alloc_data
+ * munmap(): unmaps previously shared memory
+ */
+
+#endif /*__TEE_H*/
diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h
index 5e64a86..33c603d 100644
--- a/include/uapi/linux/usb/ch9.h
+++ b/include/uapi/linux/usb/ch9.h
@@ -423,6 +423,11 @@
#define USB_ENDPOINT_XFER_INT 3
#define USB_ENDPOINT_MAX_ADJUSTABLE 0x80
+#define USB_EP_MAXP_MULT_SHIFT 11
+#define USB_EP_MAXP_MULT_MASK (3 << USB_EP_MAXP_MULT_SHIFT)
+#define USB_EP_MAXP_MULT(m) \
+ (((m) & USB_EP_MAXP_MULT_MASK) >> USB_EP_MAXP_MULT_SHIFT)
+
/* The USB 3.0 spec redefines bits 5:4 of bmAttributes as interrupt ep type. */
#define USB_ENDPOINT_INTRTYPE 0x30
#define USB_ENDPOINT_INTR_PERIODIC (0 << 4)
@@ -630,6 +635,20 @@
return __le16_to_cpu(epd->wMaxPacketSize);
}
+/**
+ * usb_endpoint_maxp_mult - get endpoint's transactional opportunities
+ * @epd: endpoint to be checked
+ *
+ * Return @epd's wMaxPacketSize[12:11] + 1
+ */
+static inline int
+usb_endpoint_maxp_mult(const struct usb_endpoint_descriptor *epd)
+{
+ int maxp = __le16_to_cpu(epd->wMaxPacketSize);
+
+ return USB_EP_MAXP_MULT(maxp) + 1;
+}
+
static inline int usb_endpoint_interrupt_type(
const struct usb_endpoint_descriptor *epd)
{
@@ -854,6 +873,8 @@
__u8 bReserved;
} __attribute__((packed));
+#define USB_DT_USB_WIRELESS_CAP_SIZE 11
+
/* USB 2.0 Extension descriptor */
#define USB_CAP_TYPE_EXT 2
@@ -1046,6 +1067,7 @@
__u8 bDevCapabilityType;
} __attribute__((packed));
+#define USB_DT_USB_PTM_ID_SIZE 3
/*
* The size of the descriptor for the Sublink Speed Attribute Count
* (SSAC) specified in bmAttributes[4:0].
diff --git a/init/Kconfig b/init/Kconfig
index 3bb5824..544d791 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1748,6 +1748,13 @@
Enable the bpf() system call that allows to manipulate eBPF
programs and maps via file descriptors.
+config BPF_JIT_ALWAYS_ON
+ bool "Permanently enable BPF JIT and remove BPF interpreter"
+ depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
+ help
+ Enables BPF JIT and removes BPF interpreter to avoid
+ speculative execution of BPF instructions by the interpreter
+
config SHMEM
bool "Use full shmem filesystem" if EXPERT
default y
diff --git a/init/initramfs.c b/init/initramfs.c
index d0b53f49..bf3af10 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -622,8 +622,11 @@
{
char *err;
- if (do_skip_initramfs)
+ if (do_skip_initramfs) {
+ if (initrd_start)
+ free_initrd();
return default_rootfs();
+ }
err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
if (err)
diff --git a/init/main.c b/init/main.c
index 25bac88..99f0265 100644
--- a/init/main.c
+++ b/init/main.c
@@ -80,6 +80,7 @@
#include <linux/integrity.h>
#include <linux/proc_ns.h>
#include <linux/io.h>
+#include <linux/kaiser.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -473,6 +474,7 @@
pgtable_init();
vmalloc_init();
ioremap_huge_init();
+ kaiser_init();
}
asmlinkage __visible void __init start_kernel(void)
diff --git a/ipc/msg.c b/ipc/msg.c
index e12307d..ff10d43 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -763,7 +763,10 @@
if (*msgtyp == 0)
return SEARCH_ANY;
if (*msgtyp < 0) {
- *msgtyp = -*msgtyp;
+ if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */
+ *msgtyp = LONG_MAX;
+ else
+ *msgtyp = -*msgtyp;
return SEARCH_LESSEQUAL;
}
if (msgflg & MSG_EXCEPT)
diff --git a/kernel/acct.c b/kernel/acct.c
index 74963d1..37f1dc6 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -99,7 +99,7 @@
{
struct kstatfs sbuf;
- if (time_is_before_jiffies(acct->needcheck))
+ if (time_is_after_jiffies(acct->needcheck))
goto out;
/* May block */
diff --git a/kernel/async.c b/kernel/async.c
index d2edd6e..d84d486 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -84,20 +84,24 @@
static async_cookie_t lowest_in_progress(struct async_domain *domain)
{
- struct list_head *pending;
+ struct async_entry *first = NULL;
async_cookie_t ret = ASYNC_COOKIE_MAX;
unsigned long flags;
spin_lock_irqsave(&async_lock, flags);
- if (domain)
- pending = &domain->pending;
- else
- pending = &async_global_pending;
+ if (domain) {
+ if (!list_empty(&domain->pending))
+ first = list_first_entry(&domain->pending,
+ struct async_entry, domain_list);
+ } else {
+ if (!list_empty(&async_global_pending))
+ first = list_first_entry(&async_global_pending,
+ struct async_entry, global_list);
+ }
- if (!list_empty(pending))
- ret = list_first_entry(pending, struct async_entry,
- domain_list)->cookie;
+ if (first)
+ ret = first->cookie;
spin_unlock_irqrestore(&async_lock, flags);
return ret;
diff --git a/kernel/audit.c b/kernel/audit.c
index f1ca116..da4e7c0 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -79,13 +79,13 @@
#define AUDIT_OFF 0
#define AUDIT_ON 1
#define AUDIT_LOCKED 2
-u32 audit_enabled;
-u32 audit_ever_enabled;
+u32 audit_enabled = AUDIT_OFF;
+u32 audit_ever_enabled = !!AUDIT_OFF;
EXPORT_SYMBOL_GPL(audit_enabled);
/* Default state when kernel boots without any parameters. */
-static u32 audit_default;
+static u32 audit_default = AUDIT_OFF;
/* If auditing cannot proceed, audit_failure selects what happens. */
static u32 audit_failure = AUDIT_FAIL_PRINTK;
@@ -1199,8 +1199,6 @@
skb_queue_head_init(&audit_skb_queue);
skb_queue_head_init(&audit_skb_hold_queue);
audit_initialized = AUDIT_INITIALIZED;
- audit_enabled = audit_default;
- audit_ever_enabled |= !!audit_default;
audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
@@ -1217,6 +1215,8 @@
audit_default = !!simple_strtol(str, NULL, 0);
if (!audit_default)
audit_initialized = AUDIT_DISABLED;
+ audit_enabled = audit_default;
+ audit_ever_enabled = !!audit_enabled;
pr_info("%s\n", audit_default ?
"enabled (after initialization)" : "disabled (until reboot)");
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index a1f8b54..2eb9b7f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -49,9 +49,10 @@
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
+ u32 elem_size, index_mask, max_entries;
+ bool unpriv = !capable(CAP_SYS_ADMIN);
struct bpf_array *array;
- u64 array_size;
- u32 elem_size;
+ u64 array_size, mask64;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -67,11 +68,32 @@
elem_size = round_up(attr->value_size, 8);
+ max_entries = attr->max_entries;
+
+ /* On 32 bit archs roundup_pow_of_two() with max_entries that has
+ * upper most bit set in u32 space is undefined behavior due to
+ * resulting 1U << 32, so do it manually here in u64 space.
+ */
+ mask64 = fls_long(max_entries - 1);
+ mask64 = 1ULL << mask64;
+ mask64 -= 1;
+
+ index_mask = mask64;
+ if (unpriv) {
+ /* round up array size to nearest power of 2,
+ * since cpu will speculate within index_mask limits
+ */
+ max_entries = index_mask + 1;
+ /* Check for overflows. */
+ if (max_entries < attr->max_entries)
+ return ERR_PTR(-E2BIG);
+ }
+
array_size = sizeof(*array);
if (percpu)
- array_size += (u64) attr->max_entries * sizeof(void *);
+ array_size += (u64) max_entries * sizeof(void *);
else
- array_size += (u64) attr->max_entries * elem_size;
+ array_size += (u64) max_entries * elem_size;
/* make sure there is no u32 overflow later in round_up() */
if (array_size >= U32_MAX - PAGE_SIZE)
@@ -81,6 +103,8 @@
array = bpf_map_area_alloc(array_size);
if (!array)
return ERR_PTR(-ENOMEM);
+ array->index_mask = index_mask;
+ array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */
array->map.map_type = attr->map_type;
@@ -114,7 +138,7 @@
if (unlikely(index >= array->map.max_entries))
return NULL;
- return array->value + array->elem_size * index;
+ return array->value + array->elem_size * (index & array->index_mask);
}
/* Called from eBPF program */
@@ -126,7 +150,7 @@
if (unlikely(index >= array->map.max_entries))
return NULL;
- return this_cpu_ptr(array->pptrs[index]);
+ return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
@@ -146,7 +170,7 @@
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
- pptr = array->pptrs[index];
+ pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
off += size;
@@ -194,10 +218,11 @@
return -EEXIST;
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
- memcpy(this_cpu_ptr(array->pptrs[index]),
+ memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
value, map->value_size);
else
- memcpy(array->value + array->elem_size * index,
+ memcpy(array->value +
+ array->elem_size * (index & array->index_mask),
value, map->value_size);
return 0;
}
@@ -231,7 +256,7 @@
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
- pptr = array->pptrs[index];
+ pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
off += size;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index aa6d981..879ca84 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -458,6 +458,7 @@
}
EXPORT_SYMBOL_GPL(__bpf_call_base);
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
/**
* __bpf_prog_run - run eBPF program on a given context
* @ctx: is the data we are operating on
@@ -641,7 +642,7 @@
DST = tmp;
CONT;
ALU_MOD_X:
- if (unlikely(SRC == 0))
+ if (unlikely((u32)SRC == 0))
return 0;
tmp = (u32) DST;
DST = do_div(tmp, (u32) SRC);
@@ -660,7 +661,7 @@
DST = div64_u64(DST, SRC);
CONT;
ALU_DIV_X:
- if (unlikely(SRC == 0))
+ if (unlikely((u32)SRC == 0))
return 0;
tmp = (u32) DST;
do_div(tmp, (u32) SRC);
@@ -715,7 +716,7 @@
struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_prog *prog;
- u64 index = BPF_R3;
+ u32 index = BPF_R3;
if (unlikely(index >= array->map.max_entries))
goto out;
@@ -923,6 +924,13 @@
}
STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */
+#else
+static unsigned int __bpf_prog_ret0(void *ctx, const struct bpf_insn *insn)
+{
+ return 0;
+}
+#endif
+
bool bpf_prog_array_compatible(struct bpf_array *array,
const struct bpf_prog *fp)
{
@@ -970,7 +978,11 @@
*/
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
{
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
fp->bpf_func = (void *) __bpf_prog_run;
+#else
+ fp->bpf_func = (void *) __bpf_prog_ret0;
+#endif
/* eBPF JITs can rewrite the program in case constant
* blinding is active. However, in case of error during
@@ -979,6 +991,12 @@
* be JITed, but falls back to the interpreter.
*/
fp = bpf_int_jit_compile(fp);
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+ if (!fp->jited) {
+ *err = -ENOTSUPP;
+ return fp;
+ }
+#endif
bpf_prog_lock_ro(fp);
/* The tail call compatibility check can only be done at
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index 5c51d19..673fa6f 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -78,8 +78,10 @@
{
struct pcpu_freelist_head *head;
struct pcpu_freelist_node *node;
+ unsigned long flags;
int orig_cpu, cpu;
+ local_irq_save(flags);
orig_cpu = cpu = raw_smp_processor_id();
while (1) {
head = per_cpu_ptr(s->freelist, cpu);
@@ -87,14 +89,16 @@
node = head->first;
if (node) {
head->first = node->next;
- raw_spin_unlock(&head->lock);
+ raw_spin_unlock_irqrestore(&head->lock, flags);
return node;
}
raw_spin_unlock(&head->lock);
cpu = cpumask_next(cpu, cpu_possible_mask);
if (cpu >= nr_cpu_ids)
cpu = 0;
- if (cpu == orig_cpu)
+ if (cpu == orig_cpu) {
+ local_irq_restore(flags);
return NULL;
+ }
}
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index adadb88..590ce0a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -636,57 +636,6 @@
list_add(&tl->list_node, &bpf_prog_types);
}
-/* fixup insn->imm field of bpf_call instructions:
- * if (insn->imm == BPF_FUNC_map_lookup_elem)
- * insn->imm = bpf_map_lookup_elem - __bpf_call_base;
- * else if (insn->imm == BPF_FUNC_map_update_elem)
- * insn->imm = bpf_map_update_elem - __bpf_call_base;
- * else ...
- *
- * this function is called after eBPF program passed verification
- */
-static void fixup_bpf_calls(struct bpf_prog *prog)
-{
- const struct bpf_func_proto *fn;
- int i;
-
- for (i = 0; i < prog->len; i++) {
- struct bpf_insn *insn = &prog->insnsi[i];
-
- if (insn->code == (BPF_JMP | BPF_CALL)) {
- /* we reach here when program has bpf_call instructions
- * and it passed bpf_check(), means that
- * ops->get_func_proto must have been supplied, check it
- */
- BUG_ON(!prog->aux->ops->get_func_proto);
-
- if (insn->imm == BPF_FUNC_get_route_realm)
- prog->dst_needed = 1;
- if (insn->imm == BPF_FUNC_get_prandom_u32)
- bpf_user_rnd_init_once();
- if (insn->imm == BPF_FUNC_tail_call) {
- /* mark bpf_tail_call as different opcode
- * to avoid conditional branch in
- * interpeter for every normal call
- * and to prevent accidental JITing by
- * JIT compiler that doesn't support
- * bpf_tail_call yet
- */
- insn->imm = 0;
- insn->code |= BPF_X;
- continue;
- }
-
- fn = prog->aux->ops->get_func_proto(insn->imm);
- /* all functions that have prototype and verifier allowed
- * programs to call them, must be real in-kernel functions
- */
- BUG_ON(!fn->func);
- insn->imm = fn->func - __bpf_call_base;
- }
- }
-}
-
/* drop refcnt on maps used by eBPF program and free auxilary data */
static void free_used_maps(struct bpf_prog_aux *aux)
{
@@ -894,9 +843,6 @@
if (err < 0)
goto free_used_maps;
- /* fixup BPF_CALL->imm field */
- fixup_bpf_calls(prog);
-
/* eBPF program is ready to be JITed */
prog = bpf_prog_select_runtime(prog, &err);
if (err < 0)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 372454a..076e4a0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -702,6 +702,13 @@
return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]);
}
+static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
+{
+ const struct bpf_reg_state *reg = &env->cur_state.regs[regno];
+
+ return reg->type == PTR_TO_CTX;
+}
+
static int check_ptr_alignment(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, int off, int size)
{
@@ -896,6 +903,12 @@
return -EACCES;
}
+ if (is_ctx_reg(env, insn->dst_reg)) {
+ verbose("BPF_XADD stores into R%d context is not allowed\n",
+ insn->dst_reg);
+ return -EACCES;
+ }
+
/* check whether atomic_add can read the memory */
err = check_mem_access(env, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ, -1);
@@ -1187,7 +1200,7 @@
}
}
-static int check_call(struct bpf_verifier_env *env, int func_id)
+static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
{
struct bpf_verifier_state *state = &env->cur_state;
const struct bpf_func_proto *fn = NULL;
@@ -1238,6 +1251,13 @@
err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
if (err)
return err;
+ if (func_id == BPF_FUNC_tail_call) {
+ if (meta.map_ptr == NULL) {
+ verbose("verifier bug\n");
+ return -EINVAL;
+ }
+ env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
+ }
err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
if (err)
return err;
@@ -1790,10 +1810,17 @@
/* case: R = imm
* remember the value we stored into this reg
*/
+ u64 imm;
+
+ if (BPF_CLASS(insn->code) == BPF_ALU64)
+ imm = insn->imm;
+ else
+ imm = (u32)insn->imm;
+
regs[insn->dst_reg].type = CONST_IMM;
- regs[insn->dst_reg].imm = insn->imm;
- regs[insn->dst_reg].max_value = insn->imm;
- regs[insn->dst_reg].min_value = insn->imm;
+ regs[insn->dst_reg].imm = imm;
+ regs[insn->dst_reg].max_value = imm;
+ regs[insn->dst_reg].min_value = imm;
}
} else if (opcode > BPF_END) {
@@ -1829,6 +1856,11 @@
return -EINVAL;
}
+ if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) {
+ verbose("BPF_ARSH not supported for 32 bit ALU\n");
+ return -EINVAL;
+ }
+
if ((opcode == BPF_LSH || opcode == BPF_RSH ||
opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
@@ -1861,10 +1893,28 @@
((BPF_SRC(insn->code) == BPF_X &&
regs[insn->src_reg].type == CONST_IMM) ||
BPF_SRC(insn->code) == BPF_K)) {
- if (BPF_SRC(insn->code) == BPF_X)
+ if (BPF_SRC(insn->code) == BPF_X) {
+ /* check in case the register contains a big
+ * 64-bit value
+ */
+ if (regs[insn->src_reg].imm < -MAX_BPF_STACK ||
+ regs[insn->src_reg].imm > MAX_BPF_STACK) {
+ verbose("R%d value too big in R%d pointer arithmetic\n",
+ insn->src_reg, insn->dst_reg);
+ return -EACCES;
+ }
dst_reg->imm += regs[insn->src_reg].imm;
- else
+ } else {
+ /* safe against overflow: addition of 32-bit
+ * numbers in 64-bit representation
+ */
dst_reg->imm += insn->imm;
+ }
+ if (dst_reg->imm > 0 || dst_reg->imm < -MAX_BPF_STACK) {
+ verbose("R%d out-of-bounds pointer arithmetic\n",
+ insn->dst_reg);
+ return -EACCES;
+ }
return 0;
} else if (opcode == BPF_ADD &&
BPF_CLASS(insn->code) == BPF_ALU64 &&
@@ -2697,11 +2747,12 @@
/* If we didn't map access then again we don't care about the
* mismatched range values and it's ok if our old type was
- * UNKNOWN and we didn't go to a NOT_INIT'ed reg.
+ * UNKNOWN and we didn't go to a NOT_INIT'ed or pointer reg.
*/
if (rold->type == NOT_INIT ||
(!varlen_map_access && rold->type == UNKNOWN_VALUE &&
- rcur->type != NOT_INIT))
+ rcur->type != NOT_INIT &&
+ !__is_pointer_value(env->allow_ptr_leaks, rcur)))
continue;
/* Don't care about the reg->id in this case. */
@@ -2862,6 +2913,7 @@
if (err)
return err;
+ env->insn_aux_data[insn_idx].seen = true;
if (class == BPF_ALU || class == BPF_ALU64) {
err = check_alu_op(env, insn);
if (err)
@@ -2973,6 +3025,12 @@
if (err)
return err;
+ if (is_ctx_reg(env, insn->dst_reg)) {
+ verbose("BPF_ST stores into R%d context is not allowed\n",
+ insn->dst_reg);
+ return -EACCES;
+ }
+
/* check that memory (dst_reg + off) is writeable */
err = check_mem_access(env, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE,
@@ -2992,7 +3050,7 @@
return -EINVAL;
}
- err = check_call(env, insn->imm);
+ err = check_call(env, insn->imm, insn_idx);
if (err)
return err;
@@ -3059,6 +3117,7 @@
return err;
insn_idx++;
+ env->insn_aux_data[insn_idx].seen = true;
} else {
verbose("invalid BPF_LD mode\n");
return -EINVAL;
@@ -3210,6 +3269,63 @@
insn->src_reg = 0;
}
+/* single env->prog->insni[off] instruction was replaced with the range
+ * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
+ * [0, off) and [off, end) to new locations, so the patched range stays zero
+ */
+static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
+ u32 off, u32 cnt)
+{
+ struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
+ int i;
+
+ if (cnt == 1)
+ return 0;
+ new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len);
+ if (!new_data)
+ return -ENOMEM;
+ memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
+ memcpy(new_data + off + cnt - 1, old_data + off,
+ sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+ for (i = off; i < off + cnt - 1; i++)
+ new_data[i].seen = true;
+ env->insn_aux_data = new_data;
+ vfree(old_data);
+ return 0;
+}
+
+static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
+ const struct bpf_insn *patch, u32 len)
+{
+ struct bpf_prog *new_prog;
+
+ new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
+ if (!new_prog)
+ return NULL;
+ if (adjust_insn_aux_data(env, new_prog->len, off, len))
+ return NULL;
+ return new_prog;
+}
+
+/* The verifier does more data flow analysis than llvm and will not explore
+ * branches that are dead at run time. Malicious programs can have dead code
+ * too. Therefore replace all dead at-run-time code with nops.
+ */
+static void sanitize_dead_code(struct bpf_verifier_env *env)
+{
+ struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+ struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
+ struct bpf_insn *insn = env->prog->insnsi;
+ const int insn_cnt = env->prog->len;
+ int i;
+
+ for (i = 0; i < insn_cnt; i++) {
+ if (aux_data[i].seen)
+ continue;
+ memcpy(insn + i, &nop, sizeof(nop));
+ }
+}
+
/* convert load instructions that access fields of 'struct __sk_buff'
* into sequence of instructions that access fields of 'struct sk_buff'
*/
@@ -3229,10 +3345,10 @@
verbose("bpf verifier is misconfigured\n");
return -EINVAL;
} else if (cnt) {
- new_prog = bpf_patch_insn_single(env->prog, 0,
- insn_buf, cnt);
+ new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
+
env->prog = new_prog;
delta += cnt - 1;
}
@@ -3253,7 +3369,7 @@
else
continue;
- if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
+ if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
continue;
cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
@@ -3263,8 +3379,7 @@
return -EINVAL;
}
- new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf,
- cnt);
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
@@ -3278,6 +3393,99 @@
return 0;
}
+/* fixup insn->imm field of bpf_call instructions
+ *
+ * this function is called after eBPF program passed verification
+ */
+static int fixup_bpf_calls(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog;
+ struct bpf_insn *insn = prog->insnsi;
+ const struct bpf_func_proto *fn;
+ const int insn_cnt = prog->len;
+ struct bpf_insn insn_buf[16];
+ struct bpf_prog *new_prog;
+ struct bpf_map *map_ptr;
+ int i, cnt, delta = 0;
+
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
+ insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
+ /* due to JIT bugs clear upper 32-bits of src register
+ * before div/mod operation
+ */
+ insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg);
+ insn_buf[1] = *insn;
+ cnt = 2;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
+ if (insn->code != (BPF_JMP | BPF_CALL))
+ continue;
+
+ if (insn->imm == BPF_FUNC_get_route_realm)
+ prog->dst_needed = 1;
+ if (insn->imm == BPF_FUNC_get_prandom_u32)
+ bpf_user_rnd_init_once();
+ if (insn->imm == BPF_FUNC_tail_call) {
+ /* mark bpf_tail_call as different opcode to avoid
+ * conditional branch in the interpeter for every normal
+ * call and to prevent accidental JITing by JIT compiler
+ * that doesn't support bpf_tail_call yet
+ */
+ insn->imm = 0;
+ insn->code |= BPF_X;
+
+ /* instead of changing every JIT dealing with tail_call
+ * emit two extra insns:
+ * if (index >= max_entries) goto out;
+ * index &= array->index_mask;
+ * to avoid out-of-bounds cpu speculation
+ */
+ map_ptr = env->insn_aux_data[i + delta].map_ptr;
+ if (!map_ptr->unpriv_array)
+ continue;
+ insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
+ map_ptr->max_entries, 2);
+ insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
+ container_of(map_ptr,
+ struct bpf_array,
+ map)->index_mask);
+ insn_buf[2] = *insn;
+ cnt = 3;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
+ fn = prog->aux->ops->get_func_proto(insn->imm);
+ /* all functions that have prototype and verifier allowed
+ * programs to call them, must be real in-kernel functions
+ */
+ if (!fn->func) {
+ verbose("kernel subsystem misconfigured func %d\n",
+ insn->imm);
+ return -EFAULT;
+ }
+ insn->imm = fn->func - __bpf_call_base;
+ }
+
+ return 0;
+}
+
static void free_states(struct bpf_verifier_env *env)
{
struct bpf_verifier_state_list *sl, *sln;
@@ -3373,9 +3581,15 @@
free_states(env);
if (ret == 0)
+ sanitize_dead_code(env);
+
+ if (ret == 0)
/* program is valid, convert *(u32*)(ctx + off) accesses */
ret = convert_ctx_accesses(env);
+ if (ret == 0)
+ ret = fixup_bpf_calls(env);
+
if (log_level && log_len >= log_size - 1) {
BUG_ON(log_len >= log_size);
/* verifier log exceeded user supplied buffer */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index ed48455..9c70769 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1316,9 +1316,9 @@
* before blk_mq_queue_reinit_notify() from notify_dead(),
* otherwise a RCU stall occurs.
*/
- [CPUHP_TIMERS_DEAD] = {
+ [CPUHP_TIMERS_PREPARE] = {
.name = "timers:dead",
- .startup.single = NULL,
+ .startup.single = timers_prepare_cpu,
.teardown.single = timers_dead_cpu,
},
/* Kicks the plugged cpu into life */
@@ -1328,11 +1328,6 @@
.teardown.single = NULL,
.cant_stop = true,
},
- [CPUHP_AP_SMPCFD_DYING] = {
- .name = "smpcfd:dying",
- .startup.single = NULL,
- .teardown.single = smpcfd_dying_cpu,
- },
/*
* Handled on controll processor until the plugged processor manages
* this itself.
@@ -1374,6 +1369,11 @@
.startup.single = NULL,
.teardown.single = rcutree_dying_cpu,
},
+ [CPUHP_AP_SMPCFD_DYING] = {
+ .name = "smpcfd:dying",
+ .startup.single = NULL,
+ .teardown.single = smpcfd_dying_cpu,
+ },
/* Entry state on starting. Interrupts enabled from here on. Transient
* state for synchronsization */
[CPUHP_AP_ONLINE] = {
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 0b89128..3990c1f 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -357,7 +357,7 @@
}
kdb_printf("\n");
for (i = 0; i < count; i++) {
- if (kallsyms_symbol_next(p_tmp, i) < 0)
+ if (WARN_ON(!kallsyms_symbol_next(p_tmp, i)))
break;
kdb_printf("%s ", p_tmp);
*(p_tmp + len) = '\0';
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 60a8da7..2cc5ed1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7093,25 +7093,12 @@
perf_output_end(&handle);
}
-/*
- * Generic event overflow handling, sampling.
- */
-
-static int __perf_event_overflow(struct perf_event *event,
- int throttle, struct perf_sample_data *data,
- struct pt_regs *regs)
+static int
+__perf_event_account_interrupt(struct perf_event *event, int throttle)
{
- int events = atomic_read(&event->event_limit);
struct hw_perf_event *hwc = &event->hw;
- u64 seq;
int ret = 0;
-
- /*
- * Non-sampling counters might still use the PMI to fold short
- * hardware counters, ignore those.
- */
- if (unlikely(!is_sampling_event(event)))
- return 0;
+ u64 seq;
seq = __this_cpu_read(perf_throttled_seq);
if (seq != hwc->interrupts_seq) {
@@ -7139,6 +7126,34 @@
perf_adjust_period(event, delta, hwc->last_period, true);
}
+ return ret;
+}
+
+int perf_event_account_interrupt(struct perf_event *event)
+{
+ return __perf_event_account_interrupt(event, 1);
+}
+
+/*
+ * Generic event overflow handling, sampling.
+ */
+
+static int __perf_event_overflow(struct perf_event *event,
+ int throttle, struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ int events = atomic_read(&event->event_limit);
+ int ret = 0;
+
+ /*
+ * Non-sampling counters might still use the PMI to fold short
+ * hardware counters, ignore those.
+ */
+ if (unlikely(!is_sampling_event(event)))
+ return 0;
+
+ ret = __perf_event_account_interrupt(event, throttle);
+
/*
* XXX event_limit might not quite work as expected on inherited
* events
diff --git a/kernel/fork.c b/kernel/fork.c
index 9321b1a..70e10cb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -58,6 +58,7 @@
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/freezer.h>
+#include <linux/kaiser.h>
#include <linux/delayacct.h>
#include <linux/taskstats_kern.h>
#include <linux/random.h>
@@ -213,6 +214,7 @@
static inline void free_thread_stack(struct task_struct *tsk)
{
+ kaiser_unmap_thread_stack(tsk->stack);
#ifdef CONFIG_VMAP_STACK
if (task_stack_vm_area(tsk)) {
unsigned long flags;
@@ -495,6 +497,10 @@
* functions again.
*/
tsk->stack = stack;
+
+ err= kaiser_map_thread_stack(tsk->stack);
+ if (err)
+ goto free_stack;
#ifdef CONFIG_VMAP_STACK
tsk->stack_vm_area = stack_vm_area;
#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index 88bad86..bb2265a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1711,6 +1711,9 @@
struct futex_q *this, *next;
WAKE_Q(wake_q);
+ if (nr_wake < 0 || nr_requeue < 0)
+ return -EINVAL;
+
if (requeue_pi) {
/*
* Requeue PI only works on two distinct uaddrs. This
diff --git a/kernel/groups.c b/kernel/groups.c
index 2fcadd6..94bde52 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -77,7 +77,7 @@
}
/* a simple Shell sort */
-static void groups_sort(struct group_info *group_info)
+void groups_sort(struct group_info *group_info)
{
int base, max, stride;
int gidsetsize = group_info->ngroups;
@@ -103,6 +103,7 @@
stride /= 3;
}
}
+EXPORT_SYMBOL(groups_sort);
/* a simple bsearch */
int groups_search(const struct group_info *group_info, kgid_t grp)
@@ -134,7 +135,6 @@
void set_groups(struct cred *new, struct group_info *group_info)
{
put_group_info(new->group_info);
- groups_sort(group_info);
get_group_info(group_info);
new->group_info = group_info;
}
@@ -218,6 +218,7 @@
return retval;
}
+ groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index a9b8cf5..def4548 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -612,7 +612,7 @@
return 0;
}
-late_initcall(jump_label_test);
+early_initcall(jump_label_test);
#endif /* STATIC_KEYS_SELFTEST */
#endif /* HAVE_JUMP_LABEL */
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 3cbb0c8..f4988f5 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -1,11 +1,16 @@
#define pr_fmt(fmt) "kcov: " fmt
#define DISABLE_BRANCH_PROFILING
+#include <linux/atomic.h>
#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/export.h>
#include <linux/types.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/init.h>
#include <linux/mm.h>
+#include <linux/preempt.h>
#include <linux/printk.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -14,6 +19,10 @@
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/kcov.h>
+#include <asm/setup.h>
+
+/* Number of 64-bit words written per one comparison: */
+#define KCOV_WORDS_PER_CMP 4
/*
* kcov descriptor (one per opened debugfs file).
@@ -21,7 +30,12 @@
* - initial state after open()
* - then there must be a single ioctl(KCOV_INIT_TRACE) call
* - then, mmap() call (several calls are allowed but not useful)
- * - then, repeated enable/disable for a task (only one task a time allowed)
+ * - then, ioctl(KCOV_ENABLE, arg), where arg is
+ * KCOV_TRACE_PC - to trace only the PCs
+ * or
+ * KCOV_TRACE_CMP - to trace only the comparison operands
+ * - then, ioctl(KCOV_DISABLE) to disable the task.
+ * Enabling/disabling ioctls can be repeated (only one task a time allowed).
*/
struct kcov {
/*
@@ -41,6 +55,36 @@
struct task_struct *t;
};
+static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
+{
+ enum kcov_mode mode;
+
+ /*
+ * We are interested in code coverage as a function of a syscall inputs,
+ * so we ignore code executed in interrupts.
+ */
+ if (!in_task())
+ return false;
+ mode = READ_ONCE(t->kcov_mode);
+ /*
+ * There is some code that runs in interrupts but for which
+ * in_interrupt() returns false (e.g. preempt_schedule_irq()).
+ * READ_ONCE()/barrier() effectively provides load-acquire wrt
+ * interrupts, there are paired barrier()/WRITE_ONCE() in
+ * kcov_ioctl_locked().
+ */
+ barrier();
+ return mode == needed_mode;
+}
+
+static unsigned long canonicalize_ip(unsigned long ip)
+{
+#ifdef CONFIG_RANDOMIZE_BASE
+ ip -= kaslr_offset();
+#endif
+ return ip;
+}
+
/*
* Entry point from instrumented code.
* This is called once per basic-block/edge.
@@ -48,46 +92,139 @@
void notrace __sanitizer_cov_trace_pc(void)
{
struct task_struct *t;
- enum kcov_mode mode;
+ unsigned long *area;
+ unsigned long ip = canonicalize_ip(_RET_IP_);
+ unsigned long pos;
t = current;
- /*
- * We are interested in code coverage as a function of a syscall inputs,
- * so we ignore code executed in interrupts.
- * The checks for whether we are in an interrupt are open-coded, because
- * 1. We can't use in_interrupt() here, since it also returns true
- * when we are inside local_bh_disable() section.
- * 2. We don't want to use (in_irq() | in_serving_softirq() | in_nmi()),
- * since that leads to slower generated code (three separate tests,
- * one for each of the flags).
- */
- if (!t || (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET
- | NMI_MASK)))
+ if (!check_kcov_mode(KCOV_MODE_TRACE_PC, t))
return;
- mode = READ_ONCE(t->kcov_mode);
- if (mode == KCOV_MODE_TRACE) {
- unsigned long *area;
- unsigned long pos;
- /*
- * There is some code that runs in interrupts but for which
- * in_interrupt() returns false (e.g. preempt_schedule_irq()).
- * READ_ONCE()/barrier() effectively provides load-acquire wrt
- * interrupts, there are paired barrier()/WRITE_ONCE() in
- * kcov_ioctl_locked().
- */
- barrier();
- area = t->kcov_area;
- /* The first word is number of subsequent PCs. */
- pos = READ_ONCE(area[0]) + 1;
- if (likely(pos < t->kcov_size)) {
- area[pos] = _RET_IP_;
- WRITE_ONCE(area[0], pos);
- }
+ area = t->kcov_area;
+ /* The first 64-bit word is the number of subsequent PCs. */
+ pos = READ_ONCE(area[0]) + 1;
+ if (likely(pos < t->kcov_size)) {
+ area[pos] = ip;
+ WRITE_ONCE(area[0], pos);
}
}
EXPORT_SYMBOL(__sanitizer_cov_trace_pc);
+#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
+static void write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip)
+{
+ struct task_struct *t;
+ u64 *area;
+ u64 count, start_index, end_pos, max_pos;
+
+ t = current;
+ if (!check_kcov_mode(KCOV_MODE_TRACE_CMP, t))
+ return;
+
+ ip = canonicalize_ip(ip);
+
+ /*
+ * We write all comparison arguments and types as u64.
+ * The buffer was allocated for t->kcov_size unsigned longs.
+ */
+ area = (u64 *)t->kcov_area;
+ max_pos = t->kcov_size * sizeof(unsigned long);
+
+ count = READ_ONCE(area[0]);
+
+ /* Every record is KCOV_WORDS_PER_CMP 64-bit words. */
+ start_index = 1 + count * KCOV_WORDS_PER_CMP;
+ end_pos = (start_index + KCOV_WORDS_PER_CMP) * sizeof(u64);
+ if (likely(end_pos <= max_pos)) {
+ area[start_index] = type;
+ area[start_index + 1] = arg1;
+ area[start_index + 2] = arg2;
+ area[start_index + 3] = ip;
+ WRITE_ONCE(area[0], count + 1);
+ }
+}
+
+void notrace __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(0), arg1, arg2, _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_cmp1);
+
+void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(1), arg1, arg2, _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2);
+
+void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_cmp4);
+
+void notrace __sanitizer_cov_trace_cmp8(u64 arg1, u64 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(3), arg1, arg2, _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_cmp8);
+
+void notrace __sanitizer_cov_trace_const_cmp1(u8 arg1, u8 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(0) | KCOV_CMP_CONST, arg1, arg2,
+ _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp1);
+
+void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(1) | KCOV_CMP_CONST, arg1, arg2,
+ _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2);
+
+void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2,
+ _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp4);
+
+void notrace __sanitizer_cov_trace_const_cmp8(u64 arg1, u64 arg2)
+{
+ write_comp_data(KCOV_CMP_SIZE(3) | KCOV_CMP_CONST, arg1, arg2,
+ _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp8);
+
+void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases)
+{
+ u64 i;
+ u64 count = cases[0];
+ u64 size = cases[1];
+ u64 type = KCOV_CMP_CONST;
+
+ switch (size) {
+ case 8:
+ type |= KCOV_CMP_SIZE(0);
+ break;
+ case 16:
+ type |= KCOV_CMP_SIZE(1);
+ break;
+ case 32:
+ type |= KCOV_CMP_SIZE(2);
+ break;
+ case 64:
+ type |= KCOV_CMP_SIZE(3);
+ break;
+ default:
+ return;
+ }
+ for (i = 0; i < count; i++)
+ write_comp_data(type, cases[i + 2], val, _RET_IP_);
+}
+EXPORT_SYMBOL(__sanitizer_cov_trace_switch);
+#endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */
+
static void kcov_get(struct kcov *kcov)
{
atomic_inc(&kcov->refcount);
@@ -124,6 +261,7 @@
/* Just to not leave dangling references behind. */
kcov_task_init(t);
kcov->t = NULL;
+ kcov->mode = KCOV_MODE_INIT;
spin_unlock(&kcov->lock);
kcov_put(kcov);
}
@@ -142,7 +280,7 @@
spin_lock(&kcov->lock);
size = kcov->size * sizeof(unsigned long);
- if (kcov->mode == KCOV_MODE_DISABLED || vma->vm_pgoff != 0 ||
+ if (kcov->mode != KCOV_MODE_INIT || vma->vm_pgoff != 0 ||
vma->vm_end - vma->vm_start != size) {
res = -EINVAL;
goto exit;
@@ -171,6 +309,7 @@
kcov = kzalloc(sizeof(*kcov), GFP_KERNEL);
if (!kcov)
return -ENOMEM;
+ kcov->mode = KCOV_MODE_DISABLED;
atomic_set(&kcov->refcount, 1);
spin_lock_init(&kcov->lock);
filep->private_data = kcov;
@@ -206,7 +345,7 @@
if (size < 2 || size > INT_MAX / sizeof(unsigned long))
return -EINVAL;
kcov->size = size;
- kcov->mode = KCOV_MODE_TRACE;
+ kcov->mode = KCOV_MODE_INIT;
return 0;
case KCOV_ENABLE:
/*
@@ -216,17 +355,25 @@
* at task exit or voluntary by KCOV_DISABLE. After that it can
* be enabled for another task.
*/
- unused = arg;
- if (unused != 0 || kcov->mode == KCOV_MODE_DISABLED ||
- kcov->area == NULL)
+ if (kcov->mode != KCOV_MODE_INIT || !kcov->area)
return -EINVAL;
if (kcov->t != NULL)
return -EBUSY;
+ if (arg == KCOV_TRACE_PC)
+ kcov->mode = KCOV_MODE_TRACE_PC;
+ else if (arg == KCOV_TRACE_CMP)
+#ifdef CONFIG_KCOV_ENABLE_COMPARISONS
+ kcov->mode = KCOV_MODE_TRACE_CMP;
+#else
+ return -ENOTSUPP;
+#endif
+ else
+ return -EINVAL;
t = current;
/* Cache in task struct for performance. */
t->kcov_size = kcov->size;
t->kcov_area = kcov->area;
- /* See comment in __sanitizer_cov_trace_pc(). */
+ /* See comment in check_kcov_mode(). */
barrier();
WRITE_ONCE(t->kcov_mode, kcov->mode);
t->kcov = kcov;
@@ -244,6 +391,7 @@
return -EINVAL;
kcov_task_init(t);
kcov->t = NULL;
+ kcov->mode = KCOV_MODE_INIT;
kcov_put(kcov);
return 0;
default:
@@ -266,6 +414,7 @@
static const struct file_operations kcov_fops = {
.open = kcov_open,
.unlocked_ioctl = kcov_ioctl,
+ .compat_ioctl = kcov_ioctl,
.mmap = kcov_mmap,
.release = kcov_close,
};
diff --git a/kernel/module.c b/kernel/module.c
index 0e54d5b..07bfb99 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2817,6 +2817,15 @@
}
#endif /* CONFIG_LIVEPATCH */
+static void check_modinfo_retpoline(struct module *mod, struct load_info *info)
+{
+ if (retpoline_module_ok(get_modinfo(info, "retpoline")))
+ return;
+
+ pr_warn("%s: loading module not compiled with retpoline compiler.\n",
+ mod->name);
+}
+
/* Sets info->hdr and info->len. */
static int copy_module_from_user(const void __user *umod, unsigned long len,
struct load_info *info)
@@ -2969,6 +2978,8 @@
add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
}
+ check_modinfo_retpoline(mod, info);
+
if (get_modinfo(info, "staging")) {
add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
pr_warn("%s: module is from the staging directory, the quality "
diff --git a/kernel/relay.c b/kernel/relay.c
index 8f18d31..2603e04 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -611,7 +611,6 @@
kref_put(&chan->kref, relay_destroy_channel);
mutex_unlock(&relay_channels_mutex);
- kfree(chan);
return NULL;
}
EXPORT_SYMBOL_GPL(relay_open);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3af7c66..b9d5c0d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5968,6 +5968,19 @@
call_rcu_sched(&old_rd->rcu, free_rootdomain);
}
+void sched_get_rd(struct root_domain *rd)
+{
+ atomic_inc(&rd->refcount);
+}
+
+void sched_put_rd(struct root_domain *rd)
+{
+ if (!atomic_dec_and_test(&rd->refcount))
+ return;
+
+ call_rcu_sched(&rd->rcu, free_rootdomain);
+}
+
static int init_rootdomain(struct root_domain *rd)
{
memset(rd, 0, sizeof(*rd));
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6c1f444..49815cc 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -447,13 +447,13 @@
*
* This function returns true if:
*
- * runtime / (deadline - t) > dl_runtime / dl_period ,
+ * runtime / (deadline - t) > dl_runtime / dl_deadline ,
*
* IOW we can't recycle current parameters.
*
- * Notice that the bandwidth check is done against the period. For
+ * Notice that the bandwidth check is done against the deadline. For
* task with deadline equal to period this is the same of using
- * dl_deadline instead of dl_period in the equation above.
+ * dl_period instead of dl_deadline in the equation above.
*/
static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
struct sched_dl_entity *pi_se, u64 t)
@@ -478,7 +478,7 @@
* of anything below microseconds resolution is actually fiction
* (but still we want to give the user that illusion >;).
*/
- left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
+ left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
right = ((dl_se->deadline - t) >> DL_SCALE) *
(pi_se->dl_runtime >> DL_SCALE);
@@ -507,10 +507,15 @@
}
}
+static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
+{
+ return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
+}
+
/*
* If the entity depleted all its runtime, and if we want it to sleep
* while waiting for some new execution time to become available, we
- * set the bandwidth enforcement timer to the replenishment instant
+ * set the bandwidth replenishment timer to the replenishment instant
* and try to activate it.
*
* Notice that it is important for the caller to know if the timer
@@ -532,7 +537,7 @@
* that it is actually coming from rq->clock and not from
* hrtimer's time base reading.
*/
- act = ns_to_ktime(dl_se->deadline);
+ act = ns_to_ktime(dl_next_period(dl_se));
now = hrtimer_cb_get_time(timer);
delta = ktime_to_ns(now) - rq_clock(rq);
act = ktime_add_ns(act, delta);
@@ -640,6 +645,7 @@
lockdep_unpin_lock(&rq->lock, rf.cookie);
rq = dl_task_offline_migration(rq, p);
rf.cookie = lockdep_pin_lock(&rq->lock);
+ update_rq_clock(rq);
/*
* Now that the task has been migrated to the new RQ and we
@@ -691,6 +697,39 @@
timer->function = dl_task_timer;
}
+/*
+ * During the activation, CBS checks if it can reuse the current task's
+ * runtime and period. If the deadline of the task is in the past, CBS
+ * cannot use the runtime, and so it replenishes the task. This rule
+ * works fine for implicit deadline tasks (deadline == period), and the
+ * CBS was designed for implicit deadline tasks. However, a task with
+ * constrained deadline (deadine < period) might be awakened after the
+ * deadline, but before the next period. In this case, replenishing the
+ * task would allow it to run for runtime / deadline. As in this case
+ * deadline < period, CBS enables a task to run for more than the
+ * runtime / period. In a very loaded system, this can cause a domino
+ * effect, making other tasks miss their deadlines.
+ *
+ * To avoid this problem, in the activation of a constrained deadline
+ * task after the deadline but before the next period, throttle the
+ * task and set the replenishing timer to the begin of the next period,
+ * unless it is boosted.
+ */
+static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
+{
+ struct task_struct *p = dl_task_of(dl_se);
+ struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
+
+ if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
+ dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
+ if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
+ return;
+ dl_se->dl_throttled = 1;
+ if (dl_se->runtime > 0)
+ dl_se->runtime = 0;
+ }
+}
+
static
int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
{
@@ -926,6 +965,11 @@
__dequeue_dl_entity(dl_se);
}
+static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
+{
+ return dl_se->dl_deadline < dl_se->dl_period;
+}
+
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
struct task_struct *pi_task = rt_mutex_get_top_task(p);
@@ -952,6 +996,15 @@
}
/*
+ * Check if a constrained deadline task was activated
+ * after the deadline but before the next period.
+ * If that is the case, the task will be throttled and
+ * the replenishment timer will be set to the next period.
+ */
+ if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
+ dl_check_constrained_dl(&p->dl);
+
+ /*
* If p is throttled, we do nothing. In fact, if it exhausted
* its budget it needs a replenishment and, since it now is on
* its rq, the bandwidth timer callback (which clearly has not
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 33b8142..1f1a73c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5745,10 +5745,22 @@
static int compute_energy(struct energy_env *eenv)
{
struct cpumask visit_cpus;
+ int cpu_count;
WARN_ON(!eenv->sg_top->sge);
cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top));
+ /* If a cpu is hotplugged in while we are in this function,
+ * it does not appear in the existing visit_cpus mask
+ * which came from the sched_group pointer of the
+ * sched_domain pointed at by sd_ea for either the prev
+ * or next cpu and was dereferenced in __energy_diff.
+ * Since we will dereference sd_scs later as we iterate
+ * through the CPUs we expect to visit, new CPUs can
+ * be present which are not in the visit_cpus mask.
+ * Guard this with cpu_count.
+ */
+ cpu_count = cpumask_weight(&visit_cpus);
while (!cpumask_empty(&visit_cpus)) {
struct sched_group *sg_shared_cap = NULL;
@@ -5758,6 +5770,8 @@
/*
* Is the group utilization affected by cpus outside this
* sched_group?
+ * This sd may have groups with cpus which were not present
+ * when we took visit_cpus.
*/
sd = rcu_dereference(per_cpu(sd_scs, cpu));
if (sd && sd->parent)
@@ -5782,9 +5796,24 @@
eenv->sg = sg;
calc_sg_energy(eenv);
- /* remove CPUs we have just visited */
- if (!sd->child)
+ if (!sd->child) {
+ /*
+ * cpu_count here is the number of
+ * cpus we expect to visit in this
+ * calculation. If we race against
+ * hotplug, we can have extra cpus
+ * added to the groups we are
+ * iterating which do not appear in
+ * the visit_cpus mask. In that case
+ * we are not able to calculate energy
+ * without restarting so we will bail
+ * out and use prev_cpu this time.
+ */
+ if (!cpu_count)
+ return -EINVAL;
cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg));
+ cpu_count--;
+ }
if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top)))
goto next_cpu;
@@ -5796,6 +5825,9 @@
* If we raced with hotplug and got an sd NULL-pointer;
* returning a wrong energy estimation is better than
* entering an infinite loop.
+ * Specifically: If a cpu is unplugged after we took
+ * the visit_cpus mask, it no longer has an sd_scs
+ * pointer, so when we dereference it, we get NULL.
*/
if (cpumask_test_cpu(cpu, &visit_cpus))
return -EINVAL;
@@ -6532,7 +6564,7 @@
* Due to large variance we need a large fuzz factor; hackbench in
* particularly is sensitive here.
*/
- if ((avg_idle / 512) < avg_cost)
+ if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost)
return -1;
time = local_clock();
@@ -10356,7 +10388,7 @@
int active_balance;
int cpu = task_cpu(p);
- if (rq->misfit_task) {
+ if (energy_aware() && rq->misfit_task) {
if (rq->curr->state != TASK_RUNNING ||
rq->curr->nr_cpus_allowed == 1)
return;
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index a8fe53b..c7f1bdd 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -51,6 +51,11 @@
*/
SCHED_FEAT(TTWU_QUEUE, true)
+/*
+ * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
+ */
+SCHED_FEAT(SIS_AVG_CPU, false)
+
#ifdef HAVE_RT_PUSH_IPI
/*
* In order to avoid a thundering herd attack of CPUs that are
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 925c6c9..cedc7c7 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1952,9 +1952,8 @@
* the rt_loop_next will cause the iterator to perform another scan.
*
*/
-static int rto_next_cpu(struct rq *rq)
+static int rto_next_cpu(struct root_domain *rd)
{
- struct root_domain *rd = rq->rd;
int next;
int cpu;
@@ -2030,19 +2029,24 @@
* Otherwise it is finishing up and an ipi needs to be sent.
*/
if (rq->rd->rto_cpu < 0)
- cpu = rto_next_cpu(rq);
+ cpu = rto_next_cpu(rq->rd);
raw_spin_unlock(&rq->rd->rto_lock);
rto_start_unlock(&rq->rd->rto_loop_start);
- if (cpu >= 0)
+ if (cpu >= 0) {
+ /* Make sure the rd does not get freed while pushing */
+ sched_get_rd(rq->rd);
irq_work_queue_on(&rq->rd->rto_push_work, cpu);
+ }
}
/* Called from hardirq context */
void rto_push_irq_work_func(struct irq_work *work)
{
+ struct root_domain *rd =
+ container_of(work, struct root_domain, rto_push_work);
struct rq *rq;
int cpu;
@@ -2058,18 +2062,20 @@
raw_spin_unlock(&rq->lock);
}
- raw_spin_lock(&rq->rd->rto_lock);
+ raw_spin_lock(&rd->rto_lock);
/* Pass the IPI to the next rt overloaded queue */
- cpu = rto_next_cpu(rq);
+ cpu = rto_next_cpu(rd);
- raw_spin_unlock(&rq->rd->rto_lock);
+ raw_spin_unlock(&rd->rto_lock);
- if (cpu < 0)
+ if (cpu < 0) {
+ sched_put_rd(rd);
return;
+ }
/* Try the next RT overloaded CPU */
- irq_work_queue_on(&rq->rd->rto_push_work, cpu);
+ irq_work_queue_on(&rd->rto_push_work, cpu);
}
#endif /* HAVE_RT_PUSH_IPI */
@@ -2079,8 +2085,9 @@
bool resched = false;
struct task_struct *p;
struct rq *src_rq;
+ int rt_overload_count = rt_overloaded(this_rq);
- if (likely(!rt_overloaded(this_rq)))
+ if (likely(!rt_overload_count))
return;
/*
@@ -2089,6 +2096,11 @@
*/
smp_rmb();
+ /* If we are the only overloaded CPU do nothing */
+ if (rt_overload_count == 1 &&
+ cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
+ return;
+
#ifdef HAVE_RT_PUSH_IPI
if (sched_feat(RT_PUSH_IPI)) {
tell_cpu_to_push(this_rq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 646ff3c..fc77c45 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -610,6 +610,8 @@
};
extern struct root_domain def_root_domain;
+extern void sched_get_rd(struct root_domain *rd);
+extern void sched_put_rd(struct root_domain *rd);
#ifdef HAVE_RT_PUSH_IPI
extern void rto_push_irq_work_func(struct irq_work *work);
diff --git a/kernel/signal.c b/kernel/signal.c
index e48668c..7ebe236 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -72,7 +72,7 @@
handler = sig_handler(t, sig);
if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
- handler == SIG_DFL && !force)
+ handler == SIG_DFL && !(force && sig_kernel_only(sig)))
return 1;
return sig_handler_ignored(handler, sig);
@@ -88,13 +88,15 @@
if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
return 0;
- if (!sig_task_ignored(t, sig, force))
+ /*
+ * Tracers may want to know about even ignored signal unless it
+ * is SIGKILL which can't be reported anyway but can be ignored
+ * by SIGNAL_UNKILLABLE task.
+ */
+ if (t->ptrace && sig != SIGKILL)
return 0;
- /*
- * Tracers may want to know about even ignored signals.
- */
- return !t->ptrace;
+ return sig_task_ignored(t, sig, force);
}
/*
@@ -917,9 +919,9 @@
* then start taking the whole group down immediately.
*/
if (sig_fatal(p, sig) &&
- !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) &&
+ !(signal->flags & SIGNAL_GROUP_EXIT) &&
!sigismember(&t->real_blocked, sig) &&
- (sig == SIGKILL || !t->ptrace)) {
+ (sig == SIGKILL || !p->ptrace)) {
/*
* This signal will be fatal to the whole group.
*/
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index eeb7f2f..54fd2fe 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -652,7 +652,9 @@
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
{
base->expires_next.tv64 = KTIME_MAX;
+ base->hang_detected = 0;
base->hres_active = 0;
+ base->next_timer = NULL;
}
/*
@@ -1610,6 +1612,7 @@
timerqueue_init_head(&cpu_base->clock_base[i].active);
}
+ cpu_base->active_bases = 0;
cpu_base->cpu = cpu;
hrtimer_init_hres(cpu_base);
return 0;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index f2826c3..fc7c37a 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -507,17 +507,22 @@
{
struct task_struct *rtn = current->group_leader;
- if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
- (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
- !same_thread_group(rtn, current) ||
- (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
+ switch (event->sigev_notify) {
+ case SIGEV_SIGNAL | SIGEV_THREAD_ID:
+ rtn = find_task_by_vpid(event->sigev_notify_thread_id);
+ if (!rtn || !same_thread_group(rtn, current))
+ return NULL;
+ /* FALLTHRU */
+ case SIGEV_SIGNAL:
+ case SIGEV_THREAD:
+ if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
+ return NULL;
+ /* FALLTHRU */
+ case SIGEV_NONE:
+ return task_pid(rtn);
+ default:
return NULL;
-
- if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
- ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
- return NULL;
-
- return task_pid(rtn);
+ }
}
void posix_timers_register_clock(const clockid_t clock_id,
@@ -745,8 +750,7 @@
/* interval timer ? */
if (iv.tv64)
cur_setting->it_interval = ktime_to_timespec(iv);
- else if (!hrtimer_active(timer) &&
- (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
+ else if (!hrtimer_active(timer) && timr->it_sigev_notify != SIGEV_NONE)
return;
now = timer->base->get_time();
@@ -757,7 +761,7 @@
* expiry is > now.
*/
if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
- (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
+ timr->it_sigev_notify == SIGEV_NONE))
timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
remaining = __hrtimer_expires_remaining_adjusted(timer, now);
@@ -767,7 +771,7 @@
* A single shot SIGEV_NONE timer must return 0, when
* it is expired !
*/
- if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
+ if (timr->it_sigev_notify != SIGEV_NONE)
cur_setting->it_value.tv_nsec = 1;
} else
cur_setting->it_value = ktime_to_timespec(remaining);
@@ -865,7 +869,7 @@
timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
/* SIGEV_NONE timers are not queued ! See common_timer_get */
- if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
+ if (timr->it_sigev_notify == SIGEV_NONE) {
/* Setup correct expiry time for relative timers */
if (mode == HRTIMER_MODE_REL) {
hrtimer_add_expires(timer, timer->base->get_time());
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3801594..6a93da8 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -663,6 +663,11 @@
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
}
+static inline bool local_timer_softirq_pending(void)
+{
+ return local_softirq_pending() & TIMER_SOFTIRQ;
+}
+
static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
ktime_t now, int cpu)
{
@@ -679,8 +684,18 @@
} while (read_seqretry(&jiffies_lock, seq));
ts->last_jiffies = basejiff;
- if (rcu_needs_cpu(basemono, &next_rcu) ||
- arch_needs_cpu() || irq_work_needs_cpu()) {
+ /*
+ * Keep the periodic tick, when RCU, architecture or irq_work
+ * requests it.
+ * Aside of that check whether the local timer softirq is
+ * pending. If so its a bad idea to call get_next_timer_interrupt()
+ * because there is an already expired timer, so it will request
+ * immeditate expiry, which rearms the hardware timer with a
+ * minimal delta which brings us back to this place
+ * immediately. Lather, rinse and repeat...
+ */
+ if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
+ irq_work_needs_cpu() || local_timer_softirq_pending()) {
next_tick = basemono + TICK_NSEC;
} else {
/*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 17c0d14..427e33d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -636,9 +636,7 @@
tk->ktime_sec = seconds;
/* Update the monotonic raw base */
- seconds = tk->raw_sec;
- nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
- tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
+ tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
}
/* must hold timekeeper_lock */
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 7d67036..2d5cc7d 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -849,11 +849,10 @@
struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
/*
- * If the timer is deferrable and nohz is active then we need to use
- * the deferrable base.
+ * If the timer is deferrable and NO_HZ_COMMON is set then we need
+ * to use the deferrable base.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active &&
- (tflags & TIMER_DEFERRABLE))
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
return base;
}
@@ -863,11 +862,10 @@
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
/*
- * If the timer is deferrable and nohz is active then we need to use
- * the deferrable base.
+ * If the timer is deferrable and NO_HZ_COMMON is set then we need
+ * to use the deferrable base.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active &&
- (tflags & TIMER_DEFERRABLE))
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
base = this_cpu_ptr(&timer_bases[BASE_DEF]);
return base;
}
@@ -1021,8 +1019,6 @@
if (!ret && pending_only)
goto out_unlock;
- debug_activate(timer, expires);
-
new_base = get_target_base(base, timer->flags);
if (base != new_base) {
@@ -1046,6 +1042,8 @@
}
}
+ debug_activate(timer, expires);
+
timer->expires = expires;
/*
* If 'idx' was calculated above and the base time did not advance
@@ -1684,7 +1682,7 @@
base->must_forward_clk = false;
__run_timers(base);
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
}
@@ -1698,7 +1696,7 @@
hrtimer_run_queues();
/* Raise the softirq only if required. */
if (time_before(jiffies, base->clk)) {
- if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
+ if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
return;
/* CPU is awake, so check the deferrable base. */
base++;
@@ -1853,6 +1851,21 @@
}
}
+int timers_prepare_cpu(unsigned int cpu)
+{
+ struct timer_base *base;
+ int b;
+
+ for (b = 0; b < NR_BASES; b++) {
+ base = per_cpu_ptr(&timer_bases[b], cpu);
+ base->clk = jiffies;
+ base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+ base->is_idle = false;
+ base->must_forward_clk = true;
+ }
+ return 0;
+}
+
int timers_dead_cpu(unsigned int cpu)
{
struct timer_base *old_base;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5b8d718..2884fe0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3911,7 +3911,6 @@
func_g.type = filter_parse_regex(glob, strlen(glob),
&func_g.search, ¬);
func_g.len = strlen(func_g.search);
- func_g.search = glob;
/* we do not support '!' for function probes */
if (WARN_ON(not))
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f5c016e..3e1d11f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -280,6 +280,8 @@
/* Missed count stored at end */
#define RB_MISSED_STORED (1 << 30)
+#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED)
+
struct buffer_data_page {
u64 time_stamp; /* page time stamp */
local_t commit; /* write committed index */
@@ -331,7 +333,9 @@
*/
size_t ring_buffer_page_len(void *page)
{
- return local_read(&((struct buffer_data_page *)page)->commit)
+ struct buffer_data_page *bpage = page;
+
+ return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
+ BUF_PAGE_HDR_SIZE;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b782c84..7590d6a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3773,37 +3773,30 @@
.llseek = seq_lseek,
};
-/*
- * The tracer itself will not take this lock, but still we want
- * to provide a consistent cpumask to user-space:
- */
-static DEFINE_MUTEX(tracing_cpumask_update_lock);
-
-/*
- * Temporary storage for the character representation of the
- * CPU bitmask (and one more byte for the newline):
- */
-static char mask_str[NR_CPUS + 1];
-
static ssize_t
tracing_cpumask_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
struct trace_array *tr = file_inode(filp)->i_private;
+ char *mask_str;
int len;
- mutex_lock(&tracing_cpumask_update_lock);
+ len = snprintf(NULL, 0, "%*pb\n",
+ cpumask_pr_args(tr->tracing_cpumask)) + 1;
+ mask_str = kmalloc(len, GFP_KERNEL);
+ if (!mask_str)
+ return -ENOMEM;
- len = snprintf(mask_str, count, "%*pb\n",
+ len = snprintf(mask_str, len, "%*pb\n",
cpumask_pr_args(tr->tracing_cpumask));
if (len >= count) {
count = -EINVAL;
goto out_err;
}
- count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
+ count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
out_err:
- mutex_unlock(&tracing_cpumask_update_lock);
+ kfree(mask_str);
return count;
}
@@ -3823,8 +3816,6 @@
if (err)
goto err_unlock;
- mutex_lock(&tracing_cpumask_update_lock);
-
local_irq_disable();
arch_spin_lock(&tr->max_lock);
for_each_tracing_cpu(cpu) {
@@ -3847,8 +3838,6 @@
local_irq_enable();
cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
-
- mutex_unlock(&tracing_cpumask_update_lock);
free_cpumask_var(tracing_cpumask_new);
return count;
@@ -6282,7 +6271,7 @@
.spd_release = buffer_spd_release,
};
struct buffer_ref *ref;
- int entries, size, i;
+ int entries, i;
ssize_t ret = 0;
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -6333,14 +6322,6 @@
break;
}
- /*
- * zero out any left over data, this is going to
- * user land.
- */
- size = ring_buffer_page_len(ref->page);
- if (size < PAGE_SIZE)
- memset(ref->page + size, 0, PAGE_SIZE - size);
-
page = virt_to_page(ref->page);
spd.pages[i] = page;
@@ -7064,6 +7045,7 @@
buf->data = alloc_percpu(struct trace_array_cpu);
if (!buf->data) {
ring_buffer_free(buf->buffer);
+ buf->buffer = NULL;
return -ENOMEM;
}
@@ -7087,7 +7069,9 @@
allocate_snapshot ? size : 1);
if (WARN_ON(ret)) {
ring_buffer_free(tr->trace_buffer.buffer);
+ tr->trace_buffer.buffer = NULL;
free_percpu(tr->trace_buffer.data);
+ tr->trace_buffer.data = NULL;
return -ENOMEM;
}
tr->allocated_snapshot = allocate_snapshot;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 03c0a48..9549ed1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2200,6 +2200,7 @@
{
struct trace_event_call *call, *p;
const char *last_system = NULL;
+ bool first = false;
int last_i;
int i;
@@ -2207,15 +2208,28 @@
list_for_each_entry_safe(call, p, &ftrace_events, list) {
/* events are usually grouped together with systems */
if (!last_system || call->class->system != last_system) {
+ first = true;
last_i = 0;
last_system = call->class->system;
}
+ /*
+ * Since calls are grouped by systems, the likelyhood that the
+ * next call in the iteration belongs to the same system as the
+ * previous call is high. As an optimization, we skip seaching
+ * for a map[] that matches the call's system if the last call
+ * was from the same system. That's what last_i is for. If the
+ * call has the same system as the previous call, then last_i
+ * will be the index of the first map[] that has a matching
+ * system.
+ */
for (i = last_i; i < len; i++) {
if (call->class->system == map[i]->system) {
/* Save the first system if need be */
- if (!last_i)
+ if (first) {
last_i = i;
+ first = false;
+ }
update_event_printk(call, map[i]);
}
}
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index f3a960e..0664044 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -449,7 +449,7 @@
}
field = trace_find_event_field(file->event_call, field_name);
- if (!field) {
+ if (!field || !field->size) {
ret = -EINVAL;
goto out;
}
@@ -547,7 +547,7 @@
}
field = trace_find_event_field(file->event_call, field_name);
- if (!field) {
+ if (!field || !field->size) {
ret = -EINVAL;
goto out;
}
diff --git a/kernel/uid16.c b/kernel/uid16.c
index cc40793..dcffcce 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -190,6 +190,7 @@
return retval;
}
+ groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 296dcca..ebfea5f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -48,6 +48,7 @@
#include <linux/nodemask.h>
#include <linux/moduleparam.h>
#include <linux/uaccess.h>
+#include <linux/nmi.h>
#include "workqueue_internal.h"
@@ -1506,6 +1507,7 @@
struct timer_list *timer = &dwork->timer;
struct work_struct *work = &dwork->work;
+ WARN_ON_ONCE(!wq);
WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
timer->data != (unsigned long)dwork);
WARN_ON_ONCE(timer_pending(timer));
@@ -4423,6 +4425,12 @@
if (pwq->nr_active || !list_empty(&pwq->delayed_works))
show_pwq(pwq);
spin_unlock_irqrestore(&pwq->pool->lock, flags);
+ /*
+ * We could be printing a lot from atomic context, e.g.
+ * sysrq-t -> show_workqueue_state(). Avoid triggering
+ * hard lockup.
+ */
+ touch_nmi_watchdog();
}
}
@@ -4450,6 +4458,12 @@
pr_cont("\n");
next_pool:
spin_unlock_irqrestore(&pool->lock, flags);
+ /*
+ * We could be printing a lot from atomic context, e.g.
+ * sysrq-t -> show_workqueue_state(). Avoid triggering
+ * hard lockup.
+ */
+ touch_nmi_watchdog();
}
rcu_read_unlock_sched();
diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index 1ef0cec..dc14bea 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -313,42 +313,47 @@
/* Decide how to handle the operation */
switch (op) {
- case ASN1_OP_MATCH_ANY_ACT:
- case ASN1_OP_MATCH_ANY_ACT_OR_SKIP:
- case ASN1_OP_COND_MATCH_ANY_ACT:
- case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP:
- ret = actions[machine[pc + 1]](context, hdr, tag, data + dp, len);
- if (ret < 0)
- return ret;
- goto skip_data;
-
- case ASN1_OP_MATCH_ACT:
- case ASN1_OP_MATCH_ACT_OR_SKIP:
- case ASN1_OP_COND_MATCH_ACT_OR_SKIP:
- ret = actions[machine[pc + 2]](context, hdr, tag, data + dp, len);
- if (ret < 0)
- return ret;
- goto skip_data;
-
case ASN1_OP_MATCH:
case ASN1_OP_MATCH_OR_SKIP:
+ case ASN1_OP_MATCH_ACT:
+ case ASN1_OP_MATCH_ACT_OR_SKIP:
case ASN1_OP_MATCH_ANY:
case ASN1_OP_MATCH_ANY_OR_SKIP:
+ case ASN1_OP_MATCH_ANY_ACT:
+ case ASN1_OP_MATCH_ANY_ACT_OR_SKIP:
case ASN1_OP_COND_MATCH_OR_SKIP:
+ case ASN1_OP_COND_MATCH_ACT_OR_SKIP:
case ASN1_OP_COND_MATCH_ANY:
case ASN1_OP_COND_MATCH_ANY_OR_SKIP:
- skip_data:
+ case ASN1_OP_COND_MATCH_ANY_ACT:
+ case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP:
+
if (!(flags & FLAG_CONS)) {
if (flags & FLAG_INDEFINITE_LENGTH) {
+ size_t tmp = dp;
+
ret = asn1_find_indefinite_length(
- data, datalen, &dp, &len, &errmsg);
+ data, datalen, &tmp, &len, &errmsg);
if (ret < 0)
goto error;
- } else {
- dp += len;
}
pr_debug("- LEAF: %zu\n", len);
}
+
+ if (op & ASN1_OP_MATCH__ACT) {
+ unsigned char act;
+
+ if (op & ASN1_OP_MATCH__ANY)
+ act = machine[pc + 1];
+ else
+ act = machine[pc + 2];
+ ret = actions[act](context, hdr, tag, data + dp, len);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (!(flags & FLAG_CONS))
+ dp += len;
pc += asn1_op_lengths[op];
goto next_op;
@@ -434,6 +439,8 @@
else
act = machine[pc + 1];
ret = actions[act](context, hdr, 0, data + tdp, len);
+ if (ret < 0)
+ return ret;
}
pc += asn1_op_lengths[op];
goto next_op;
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index da796e2..c7c96bc 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -360,6 +360,10 @@
if (parse_lineno(last, &query->last_lineno) < 0)
return -EINVAL;
+ /* special case for last lineno not specified */
+ if (query->last_lineno == 0)
+ query->last_lineno = UINT_MAX;
+
if (query->last_lineno < query->first_lineno) {
pr_err("last-line:%d < 1st-line:%d\n",
query->last_lineno,
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 144fe6b..ca06adc 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -194,7 +194,7 @@
chunk->phys_addr = phys;
chunk->start_addr = virt;
chunk->end_addr = virt + size - 1;
- atomic_set(&chunk->avail, size);
+ atomic_long_set(&chunk->avail, size);
spin_lock(&pool->lock);
list_add_rcu(&chunk->next_chunk, &pool->chunks);
@@ -304,7 +304,7 @@
nbits = (size + (1UL << order) - 1) >> order;
rcu_read_lock();
list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
- if (size > atomic_read(&chunk->avail))
+ if (size > atomic_long_read(&chunk->avail))
continue;
start_bit = 0;
@@ -324,7 +324,7 @@
addr = chunk->start_addr + ((unsigned long)start_bit << order);
size = nbits << order;
- atomic_sub(size, &chunk->avail);
+ atomic_long_sub(size, &chunk->avail);
break;
}
rcu_read_unlock();
@@ -390,7 +390,7 @@
remain = bitmap_clear_ll(chunk->bits, start_bit, nbits);
BUG_ON(remain);
size = nbits << order;
- atomic_add(size, &chunk->avail);
+ atomic_long_add(size, &chunk->avail);
rcu_read_unlock();
return;
}
@@ -464,7 +464,7 @@
rcu_read_lock();
list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
- avail += atomic_read(&chunk->avail);
+ avail += atomic_long_read(&chunk->avail);
rcu_read_unlock();
return avail;
}
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 2e38502..98da752 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5646,9 +5646,8 @@
return NULL;
}
}
- /* We don't expect to fail. */
if (*err) {
- pr_cont("FAIL to attach err=%d len=%d\n",
+ pr_cont("FAIL to prog_create err=%d len=%d\n",
*err, fprog.len);
return NULL;
}
@@ -5671,6 +5670,10 @@
* checks.
*/
fp = bpf_prog_select_runtime(fp, err);
+ if (*err) {
+ pr_cont("FAIL to select_runtime err=%d\n", *err);
+ return NULL;
+ }
break;
}
@@ -5856,8 +5859,8 @@
pass_cnt++;
continue;
}
-
- return err;
+ err_cnt++;
+ continue;
}
pr_cont("jited:%u ", fp->jited);
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index fbdf879..0e70ecc 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -19,6 +19,7 @@
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/module.h>
+#include <linux/kasan.h>
/*
* Note: test functions are marked noinline so that their names appear in
@@ -441,6 +442,12 @@
static int __init kmalloc_tests_init(void)
{
+ /*
+ * Temporarily enable multi-shot mode. Otherwise, we'd only get a
+ * report for the first case.
+ */
+ bool multishot = kasan_save_enable_multi_shot();
+
kmalloc_oob_right();
kmalloc_oob_left();
kmalloc_node_oob_right();
@@ -465,6 +472,9 @@
ksize_unpoisons_memory();
copy_user_test();
use_after_scope_test();
+
+ kasan_restore_multi_shot(multishot);
+
return -EAGAIN;
}
diff --git a/lib/ubsan.c b/lib/ubsan.c
index fb0409d..50d1d5c 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -265,14 +265,14 @@
}
EXPORT_SYMBOL(__ubsan_handle_divrem_overflow);
-static void handle_null_ptr_deref(struct type_mismatch_data *data)
+static void handle_null_ptr_deref(struct type_mismatch_data_common *data)
{
unsigned long flags;
- if (suppress_report(&data->location))
+ if (suppress_report(data->location))
return;
- ubsan_prologue(&data->location, &flags);
+ ubsan_prologue(data->location, &flags);
pr_err("%s null pointer of type %s\n",
type_check_kinds[data->type_check_kind],
@@ -281,15 +281,15 @@
ubsan_epilogue(&flags);
}
-static void handle_missaligned_access(struct type_mismatch_data *data,
+static void handle_misaligned_access(struct type_mismatch_data_common *data,
unsigned long ptr)
{
unsigned long flags;
- if (suppress_report(&data->location))
+ if (suppress_report(data->location))
return;
- ubsan_prologue(&data->location, &flags);
+ ubsan_prologue(data->location, &flags);
pr_err("%s misaligned address %p for type %s\n",
type_check_kinds[data->type_check_kind],
@@ -299,15 +299,15 @@
ubsan_epilogue(&flags);
}
-static void handle_object_size_mismatch(struct type_mismatch_data *data,
+static void handle_object_size_mismatch(struct type_mismatch_data_common *data,
unsigned long ptr)
{
unsigned long flags;
- if (suppress_report(&data->location))
+ if (suppress_report(data->location))
return;
- ubsan_prologue(&data->location, &flags);
+ ubsan_prologue(data->location, &flags);
pr_err("%s address %p with insufficient space\n",
type_check_kinds[data->type_check_kind],
(void *) ptr);
@@ -315,19 +315,47 @@
ubsan_epilogue(&flags);
}
-void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
+static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
unsigned long ptr)
{
if (!ptr)
handle_null_ptr_deref(data);
else if (data->alignment && !IS_ALIGNED(ptr, data->alignment))
- handle_missaligned_access(data, ptr);
+ handle_misaligned_access(data, ptr);
else
handle_object_size_mismatch(data, ptr);
}
+
+void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
+ unsigned long ptr)
+{
+ struct type_mismatch_data_common common_data = {
+ .location = &data->location,
+ .type = data->type,
+ .alignment = data->alignment,
+ .type_check_kind = data->type_check_kind
+ };
+
+ ubsan_type_mismatch_common(&common_data, ptr);
+}
EXPORT_SYMBOL(__ubsan_handle_type_mismatch);
+void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data,
+ unsigned long ptr)
+{
+
+ struct type_mismatch_data_common common_data = {
+ .location = &data->location,
+ .type = data->type,
+ .alignment = 1UL << data->log_alignment,
+ .type_check_kind = data->type_check_kind
+ };
+
+ ubsan_type_mismatch_common(&common_data, ptr);
+}
+EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1);
+
void __ubsan_handle_nonnull_return(struct nonnull_return_data *data)
{
unsigned long flags;
diff --git a/lib/ubsan.h b/lib/ubsan.h
index b2d18d4..d8b8085 100644
--- a/lib/ubsan.h
+++ b/lib/ubsan.h
@@ -36,6 +36,20 @@
unsigned char type_check_kind;
};
+struct type_mismatch_data_v1 {
+ struct source_location location;
+ struct type_descriptor *type;
+ unsigned char log_alignment;
+ unsigned char type_check_kind;
+};
+
+struct type_mismatch_data_common {
+ struct source_location *location;
+ struct type_descriptor *type;
+ unsigned long alignment;
+ unsigned char type_check_kind;
+};
+
struct nonnull_arg_data {
struct source_location location;
struct source_location attr_location;
diff --git a/mm/cma.c b/mm/cma.c
index c960459..397687f 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -54,7 +54,7 @@
}
static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
- int align_order)
+ unsigned int align_order)
{
if (align_order <= cma->order_per_bit)
return 0;
@@ -62,17 +62,14 @@
}
/*
- * Find a PFN aligned to the specified order and return an offset represented in
- * order_per_bits.
+ * Find the offset of the base PFN from the specified align_order.
+ * The value returned is represented in order_per_bits.
*/
static unsigned long cma_bitmap_aligned_offset(const struct cma *cma,
- int align_order)
+ unsigned int align_order)
{
- if (align_order <= cma->order_per_bit)
- return 0;
-
- return (ALIGN(cma->base_pfn, (1UL << align_order))
- - cma->base_pfn) >> cma->order_per_bit;
+ return (cma->base_pfn & ((1UL << align_order) - 1))
+ >> cma->order_per_bit;
}
static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8258e9e..c234c07 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -745,20 +745,15 @@
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd)
+ pmd_t *pmd, int flags)
{
pmd_t _pmd;
- /*
- * We should set the dirty bit only for FOLL_WRITE but for now
- * the dirty bit in the pmd is meaningless. And if the dirty
- * bit will become meaningful and we'll only set it with
- * FOLL_WRITE, an atomic set_bit will be required on the pmd to
- * set the young bit, instead of the current set_pmd_at.
- */
- _pmd = pmd_mkyoung(pmd_mkdirty(*pmd));
+ _pmd = pmd_mkyoung(*pmd);
+ if (flags & FOLL_WRITE)
+ _pmd = pmd_mkdirty(_pmd);
if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
- pmd, _pmd, 1))
+ pmd, _pmd, flags & FOLL_WRITE))
update_mmu_cache_pmd(vma, addr, pmd);
}
@@ -787,7 +782,7 @@
return NULL;
if (flags & FOLL_TOUCH)
- touch_pmd(vma, addr, pmd);
+ touch_pmd(vma, addr, pmd, flags);
/*
* device mapped pages can only be returned if the
@@ -1158,7 +1153,7 @@
page = pmd_page(*pmd);
VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
if (flags & FOLL_TOUCH)
- touch_pmd(vma, addr, pmd);
+ touch_pmd(vma, addr, pmd, flags);
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
/*
* We don't mlock() pte-mapped THPs. This way we can avoid
@@ -1514,37 +1509,69 @@
{
struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
- int ret = 0;
+ pmd_t entry;
+ bool preserve_write;
+ int ret;
ptl = __pmd_trans_huge_lock(pmd, vma);
- if (ptl) {
- pmd_t entry;
- bool preserve_write = prot_numa && pmd_write(*pmd);
- ret = 1;
+ if (!ptl)
+ return 0;
- /*
- * Avoid trapping faults against the zero page. The read-only
- * data is likely to be read-cached on the local CPU and
- * local/remote hits to the zero page are not interesting.
- */
- if (prot_numa && is_huge_zero_pmd(*pmd)) {
- spin_unlock(ptl);
- return ret;
- }
+ preserve_write = prot_numa && pmd_write(*pmd);
+ ret = 1;
- if (!prot_numa || !pmd_protnone(*pmd)) {
- entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
- entry = pmd_modify(entry, newprot);
- if (preserve_write)
- entry = pmd_mkwrite(entry);
- ret = HPAGE_PMD_NR;
- set_pmd_at(mm, addr, pmd, entry);
- BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
- pmd_write(entry));
- }
- spin_unlock(ptl);
- }
+ /*
+ * Avoid trapping faults against the zero page. The read-only
+ * data is likely to be read-cached on the local CPU and
+ * local/remote hits to the zero page are not interesting.
+ */
+ if (prot_numa && is_huge_zero_pmd(*pmd))
+ goto unlock;
+ if (prot_numa && pmd_protnone(*pmd))
+ goto unlock;
+
+ /*
+ * In case prot_numa, we are under down_read(mmap_sem). It's critical
+ * to not clear pmd intermittently to avoid race with MADV_DONTNEED
+ * which is also under down_read(mmap_sem):
+ *
+ * CPU0: CPU1:
+ * change_huge_pmd(prot_numa=1)
+ * pmdp_huge_get_and_clear_notify()
+ * madvise_dontneed()
+ * zap_pmd_range()
+ * pmd_trans_huge(*pmd) == 0 (without ptl)
+ * // skip the pmd
+ * set_pmd_at();
+ * // pmd is re-established
+ *
+ * The race makes MADV_DONTNEED miss the huge pmd and don't clear it
+ * which may break userspace.
+ *
+ * pmdp_invalidate() is required to make sure we don't miss
+ * dirty/young flags set by hardware.
+ */
+ entry = *pmd;
+ pmdp_invalidate(vma, addr, pmd);
+
+ /*
+ * Recover dirty/young flags. It relies on pmdp_invalidate to not
+ * corrupt them.
+ */
+ if (pmd_dirty(*pmd))
+ entry = pmd_mkdirty(entry);
+ if (pmd_young(*pmd))
+ entry = pmd_mkyoung(entry);
+
+ entry = pmd_modify(entry, newprot);
+ if (preserve_write)
+ entry = pmd_mkwrite(entry);
+ ret = HPAGE_PMD_NR;
+ set_pmd_at(mm, addr, pmd, entry);
+ BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
+unlock:
+ spin_unlock(ptl);
return ret;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 65c36ac..6ff65c4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3135,6 +3135,13 @@
}
}
+static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
+{
+ if (addr & ~(huge_page_mask(hstate_vma(vma))))
+ return -EINVAL;
+ return 0;
+}
+
/*
* We cannot handle pagefaults against hugetlb pages at all. They cause
* handle_mm_fault() to try to instantiate regular-sized pages in the
@@ -3151,6 +3158,7 @@
.fault = hugetlb_vm_op_fault,
.open = hugetlb_vm_op_open,
.close = hugetlb_vm_op_close,
+ .split = hugetlb_vm_op_split,
};
static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
diff --git a/mm/internal.h b/mm/internal.h
index 34a5459..3e2d016 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -74,6 +74,12 @@
extern unsigned long highest_memmap_pfn;
/*
+ * Maximum number of reclaim retries without progress before the OOM
+ * killer is consider the only way forward.
+ */
+#define MAX_RECLAIM_RETRIES 16
+
+/*
* in mm/vmscan.c:
*/
extern int isolate_lru_page(struct page *page);
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 0e9505f..7a6d1a1 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -39,6 +39,16 @@
#include "kasan.h"
#include "../slab.h"
+void kasan_enable_current(void)
+{
+ current->kasan_depth++;
+}
+
+void kasan_disable_current(void)
+{
+ current->kasan_depth--;
+}
+
/*
* Poisons the shadow memory for 'size' bytes starting from 'addr'.
* Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE.
@@ -428,7 +438,7 @@
quarantine_remove_cache(cache);
}
-void kasan_cache_destroy(struct kmem_cache *cache)
+void kasan_cache_shutdown(struct kmem_cache *cache)
{
quarantine_remove_cache(cache);
}
@@ -559,7 +569,8 @@
shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object));
if (shadow_byte < 0 || shadow_byte >= KASAN_SHADOW_SCALE_SIZE) {
- kasan_report_double_free(cache, object, shadow_byte);
+ kasan_report_double_free(cache, object,
+ __builtin_return_address(1));
return true;
}
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 1c260e6..1229298 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -96,15 +96,10 @@
<< KASAN_SHADOW_SCALE_SHIFT);
}
-static inline bool kasan_report_enabled(void)
-{
- return !current->kasan_depth;
-}
-
void kasan_report(unsigned long addr, size_t size,
bool is_write, unsigned long ip);
void kasan_report_double_free(struct kmem_cache *cache, void *object,
- s8 shadow);
+ void *ip);
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB)
void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache);
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index baabaad..3a8ddf8 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -25,6 +25,7 @@
#include <linux/printk.h>
#include <linux/shrinker.h>
#include <linux/slab.h>
+#include <linux/srcu.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -86,24 +87,9 @@
qlist_init(from);
}
-static void qlist_move(struct qlist_head *from, struct qlist_node *last,
- struct qlist_head *to, size_t size)
-{
- if (unlikely(last == from->tail)) {
- qlist_move_all(from, to);
- return;
- }
- if (qlist_empty(to))
- to->head = from->head;
- else
- to->tail->next = from->head;
- to->tail = last;
- from->head = last->next;
- last->next = NULL;
- from->bytes -= size;
- to->bytes += size;
-}
-
+#define QUARANTINE_PERCPU_SIZE (1 << 20)
+#define QUARANTINE_BATCHES \
+ (1024 > 4 * CONFIG_NR_CPUS ? 1024 : 4 * CONFIG_NR_CPUS)
/*
* The object quarantine consists of per-cpu queues and a global queue,
@@ -111,11 +97,23 @@
*/
static DEFINE_PER_CPU(struct qlist_head, cpu_quarantine);
-static struct qlist_head global_quarantine;
+/* Round-robin FIFO array of batches. */
+static struct qlist_head global_quarantine[QUARANTINE_BATCHES];
+static int quarantine_head;
+static int quarantine_tail;
+/* Total size of all objects in global_quarantine across all batches. */
+static unsigned long quarantine_size;
static DEFINE_SPINLOCK(quarantine_lock);
+DEFINE_STATIC_SRCU(remove_cache_srcu);
/* Maximum size of the global queue. */
-static unsigned long quarantine_size;
+static unsigned long quarantine_max_size;
+
+/*
+ * Target size of a batch in global_quarantine.
+ * Usually equal to QUARANTINE_PERCPU_SIZE unless we have too much RAM.
+ */
+static unsigned long quarantine_batch_size;
/*
* The fraction of physical memory the quarantine is allowed to occupy.
@@ -124,9 +122,6 @@
*/
#define QUARANTINE_FRACTION 32
-#define QUARANTINE_LOW_SIZE (READ_ONCE(quarantine_size) * 3 / 4)
-#define QUARANTINE_PERCPU_SIZE (1 << 20)
-
static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink)
{
return virt_to_head_page(qlink)->slab_cache;
@@ -180,62 +175,89 @@
struct qlist_head *q;
struct qlist_head temp = QLIST_INIT;
+ /*
+ * Note: irq must be disabled until after we move the batch to the
+ * global quarantine. Otherwise quarantine_remove_cache() can miss
+ * some objects belonging to the cache if they are in our local temp
+ * list. quarantine_remove_cache() executes on_each_cpu() at the
+ * beginning which ensures that it either sees the objects in per-cpu
+ * lists or in the global quarantine.
+ */
local_irq_save(flags);
q = this_cpu_ptr(&cpu_quarantine);
qlist_put(q, &info->quarantine_link, cache->size);
- if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE))
+ if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
qlist_move_all(q, &temp);
- local_irq_restore(flags);
+ spin_lock(&quarantine_lock);
+ WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
+ qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
+ if (global_quarantine[quarantine_tail].bytes >=
+ READ_ONCE(quarantine_batch_size)) {
+ int new_tail;
- if (unlikely(!qlist_empty(&temp))) {
- spin_lock_irqsave(&quarantine_lock, flags);
- qlist_move_all(&temp, &global_quarantine);
- spin_unlock_irqrestore(&quarantine_lock, flags);
+ new_tail = quarantine_tail + 1;
+ if (new_tail == QUARANTINE_BATCHES)
+ new_tail = 0;
+ if (new_tail != quarantine_head)
+ quarantine_tail = new_tail;
+ }
+ spin_unlock(&quarantine_lock);
}
+
+ local_irq_restore(flags);
}
void quarantine_reduce(void)
{
- size_t new_quarantine_size, percpu_quarantines;
+ size_t total_size, new_quarantine_size, percpu_quarantines;
unsigned long flags;
+ int srcu_idx;
struct qlist_head to_free = QLIST_INIT;
- size_t size_to_free = 0;
- struct qlist_node *last;
- if (likely(READ_ONCE(global_quarantine.bytes) <=
- READ_ONCE(quarantine_size)))
+ if (likely(READ_ONCE(quarantine_size) <=
+ READ_ONCE(quarantine_max_size)))
return;
+ /*
+ * srcu critical section ensures that quarantine_remove_cache()
+ * will not miss objects belonging to the cache while they are in our
+ * local to_free list. srcu is chosen because (1) it gives us private
+ * grace period domain that does not interfere with anything else,
+ * and (2) it allows synchronize_srcu() to return without waiting
+ * if there are no pending read critical sections (which is the
+ * expected case).
+ */
+ srcu_idx = srcu_read_lock(&remove_cache_srcu);
spin_lock_irqsave(&quarantine_lock, flags);
/*
* Update quarantine size in case of hotplug. Allocate a fraction of
* the installed memory to quarantine minus per-cpu queue limits.
*/
- new_quarantine_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
+ total_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
QUARANTINE_FRACTION;
percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus();
- new_quarantine_size = (new_quarantine_size < percpu_quarantines) ?
- 0 : new_quarantine_size - percpu_quarantines;
- WRITE_ONCE(quarantine_size, new_quarantine_size);
+ new_quarantine_size = (total_size < percpu_quarantines) ?
+ 0 : total_size - percpu_quarantines;
+ WRITE_ONCE(quarantine_max_size, new_quarantine_size);
+ /* Aim at consuming at most 1/2 of slots in quarantine. */
+ WRITE_ONCE(quarantine_batch_size, max((size_t)QUARANTINE_PERCPU_SIZE,
+ 2 * total_size / QUARANTINE_BATCHES));
- last = global_quarantine.head;
- while (last) {
- struct kmem_cache *cache = qlink_to_cache(last);
-
- size_to_free += cache->size;
- if (!last->next || size_to_free >
- global_quarantine.bytes - QUARANTINE_LOW_SIZE)
- break;
- last = last->next;
+ if (likely(quarantine_size > quarantine_max_size)) {
+ qlist_move_all(&global_quarantine[quarantine_head], &to_free);
+ WRITE_ONCE(quarantine_size, quarantine_size - to_free.bytes);
+ quarantine_head++;
+ if (quarantine_head == QUARANTINE_BATCHES)
+ quarantine_head = 0;
}
- qlist_move(&global_quarantine, last, &to_free, size_to_free);
spin_unlock_irqrestore(&quarantine_lock, flags);
qlist_free_all(&to_free, NULL);
+ srcu_read_unlock(&remove_cache_srcu, srcu_idx);
}
static void qlist_move_cache(struct qlist_head *from,
@@ -273,16 +295,34 @@
qlist_free_all(&to_free, cache);
}
+/* Free all quarantined objects belonging to cache. */
void quarantine_remove_cache(struct kmem_cache *cache)
{
- unsigned long flags;
+ unsigned long flags, i;
struct qlist_head to_free = QLIST_INIT;
+ /*
+ * Must be careful to not miss any objects that are being moved from
+ * per-cpu list to the global quarantine in quarantine_put(),
+ * nor objects being freed in quarantine_reduce(). on_each_cpu()
+ * achieves the first goal, while synchronize_srcu() achieves the
+ * second.
+ */
on_each_cpu(per_cpu_remove_cache, cache, 1);
spin_lock_irqsave(&quarantine_lock, flags);
- qlist_move_cache(&global_quarantine, &to_free, cache);
+ for (i = 0; i < QUARANTINE_BATCHES; i++) {
+ if (qlist_empty(&global_quarantine[i]))
+ continue;
+ qlist_move_cache(&global_quarantine[i], &to_free, cache);
+ /* Scanning whole quarantine can take a while. */
+ spin_unlock_irqrestore(&quarantine_lock, flags);
+ cond_resched();
+ spin_lock_irqsave(&quarantine_lock, flags);
+ }
spin_unlock_irqrestore(&quarantine_lock, flags);
qlist_free_all(&to_free, cache);
+
+ synchronize_srcu(&remove_cache_srcu);
}
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 8ca412a..04bb1d3 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -13,7 +13,9 @@
*
*/
+#include <linux/bitops.h>
#include <linux/ftrace.h>
+#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/printk.h>
@@ -49,7 +51,13 @@
return first_bad_addr;
}
-static void print_error_description(struct kasan_access_info *info)
+static bool addr_has_shadow(struct kasan_access_info *info)
+{
+ return (info->access_addr >=
+ kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
+}
+
+static const char *get_shadow_bug_type(struct kasan_access_info *info)
{
const char *bug_type = "unknown-crash";
u8 *shadow_addr;
@@ -96,12 +104,39 @@
break;
}
- pr_err("BUG: KASAN: %s in %pS at addr %p\n",
- bug_type, (void *)info->ip,
- info->access_addr);
- pr_err("%s of size %zu by task %s/%d\n",
- info->is_write ? "Write" : "Read",
- info->access_size, current->comm, task_pid_nr(current));
+ return bug_type;
+}
+
+static const char *get_wild_bug_type(struct kasan_access_info *info)
+{
+ const char *bug_type = "unknown-crash";
+
+ if ((unsigned long)info->access_addr < PAGE_SIZE)
+ bug_type = "null-ptr-deref";
+ else if ((unsigned long)info->access_addr < TASK_SIZE)
+ bug_type = "user-memory-access";
+ else
+ bug_type = "wild-memory-access";
+
+ return bug_type;
+}
+
+static const char *get_bug_type(struct kasan_access_info *info)
+{
+ if (addr_has_shadow(info))
+ return get_shadow_bug_type(info);
+ return get_wild_bug_type(info);
+}
+
+static void print_error_description(struct kasan_access_info *info)
+{
+ const char *bug_type = get_bug_type(info);
+
+ pr_err("BUG: KASAN: %s in %pS\n",
+ bug_type, (void *)info->ip);
+ pr_err("%s of size %zu at addr %p by task %s/%d\n",
+ info->is_write ? "Write" : "Read", info->access_size,
+ info->access_addr, current->comm, task_pid_nr(current));
}
static inline bool kernel_or_module_addr(const void *addr)
@@ -137,12 +172,14 @@
pr_err("==================================================================\n");
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
spin_unlock_irqrestore(&report_lock, *flags);
+ if (panic_on_warn)
+ panic("panic_on_warn set ...\n");
kasan_enable_current();
}
-static void print_track(struct kasan_track *track)
+static void print_track(struct kasan_track *track, const char *prefix)
{
- pr_err("PID = %u\n", track->pid);
+ pr_err("%s by task %u:\n", prefix, track->pid);
if (track->stack) {
struct stack_trace trace;
@@ -153,59 +190,84 @@
}
}
-static void kasan_object_err(struct kmem_cache *cache, void *object)
+static struct page *addr_to_page(const void *addr)
+{
+ if ((addr >= (void *)PAGE_OFFSET) &&
+ (addr < high_memory))
+ return virt_to_head_page(addr);
+ return NULL;
+}
+
+static void describe_object_addr(struct kmem_cache *cache, void *object,
+ const void *addr)
+{
+ unsigned long access_addr = (unsigned long)addr;
+ unsigned long object_addr = (unsigned long)object;
+ const char *rel_type;
+ int rel_bytes;
+
+ pr_err("The buggy address belongs to the object at %p\n"
+ " which belongs to the cache %s of size %d\n",
+ object, cache->name, cache->object_size);
+
+ if (!addr)
+ return;
+
+ if (access_addr < object_addr) {
+ rel_type = "to the left";
+ rel_bytes = object_addr - access_addr;
+ } else if (access_addr >= object_addr + cache->object_size) {
+ rel_type = "to the right";
+ rel_bytes = access_addr - (object_addr + cache->object_size);
+ } else {
+ rel_type = "inside";
+ rel_bytes = access_addr - object_addr;
+ }
+
+ pr_err("The buggy address is located %d bytes %s of\n"
+ " %d-byte region [%p, %p)\n",
+ rel_bytes, rel_type, cache->object_size, (void *)object_addr,
+ (void *)(object_addr + cache->object_size));
+}
+
+static void describe_object(struct kmem_cache *cache, void *object,
+ const void *addr)
{
struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
+ if (cache->flags & SLAB_KASAN) {
+ print_track(&alloc_info->alloc_track, "Allocated");
+ pr_err("\n");
+ print_track(&alloc_info->free_track, "Freed");
+ pr_err("\n");
+ }
+
+ describe_object_addr(cache, object, addr);
+}
+
+static void print_address_description(void *addr)
+{
+ struct page *page = addr_to_page(addr);
+
dump_stack();
- pr_err("Object at %p, in cache %s size: %d\n", object, cache->name,
- cache->object_size);
+ pr_err("\n");
- if (!(cache->flags & SLAB_KASAN))
- return;
+ if (page && PageSlab(page)) {
+ struct kmem_cache *cache = page->slab_cache;
+ void *object = nearest_obj(cache, page, addr);
- pr_err("Allocated:\n");
- print_track(&alloc_info->alloc_track);
- pr_err("Freed:\n");
- print_track(&alloc_info->free_track);
-}
+ describe_object(cache, object, addr);
+ }
-void kasan_report_double_free(struct kmem_cache *cache, void *object,
- s8 shadow)
-{
- unsigned long flags;
+ if (kernel_or_module_addr(addr) && !init_task_stack_addr(addr)) {
+ pr_err("The buggy address belongs to the variable:\n");
+ pr_err(" %pS\n", addr);
+ }
- kasan_start_report(&flags);
- pr_err("BUG: Double free or freeing an invalid pointer\n");
- pr_err("Unexpected shadow byte: 0x%hhX\n", shadow);
- kasan_object_err(cache, object);
- kasan_end_report(&flags);
-}
-
-static void print_address_description(struct kasan_access_info *info)
-{
- const void *addr = info->access_addr;
-
- if ((addr >= (void *)PAGE_OFFSET) &&
- (addr < high_memory)) {
- struct page *page = virt_to_head_page(addr);
-
- if (PageSlab(page)) {
- void *object;
- struct kmem_cache *cache = page->slab_cache;
- object = nearest_obj(cache, page,
- (void *)info->access_addr);
- kasan_object_err(cache, object);
- return;
- }
+ if (page) {
+ pr_err("The buggy address belongs to the page:\n");
dump_page(page, "kasan: bad access detected");
}
-
- if (kernel_or_module_addr(addr)) {
- if (!init_task_stack_addr(addr))
- pr_err("Address belongs to variable %pS\n", addr);
- }
- dump_stack();
}
static bool row_is_guilty(const void *row, const void *guilty)
@@ -260,37 +322,74 @@
}
}
+void kasan_report_double_free(struct kmem_cache *cache, void *object,
+ void *ip)
+{
+ unsigned long flags;
+
+ kasan_start_report(&flags);
+ pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", ip);
+ pr_err("\n");
+ print_address_description(object);
+ pr_err("\n");
+ print_shadow_for_address(object);
+ kasan_end_report(&flags);
+}
+
static void kasan_report_error(struct kasan_access_info *info)
{
unsigned long flags;
- const char *bug_type;
kasan_start_report(&flags);
- if (info->access_addr <
- kasan_shadow_to_mem((void *)KASAN_SHADOW_START)) {
- if ((unsigned long)info->access_addr < PAGE_SIZE)
- bug_type = "null-ptr-deref";
- else if ((unsigned long)info->access_addr < TASK_SIZE)
- bug_type = "user-memory-access";
- else
- bug_type = "wild-memory-access";
- pr_err("BUG: KASAN: %s on address %p\n",
- bug_type, info->access_addr);
- pr_err("%s of size %zu by task %s/%d\n",
- info->is_write ? "Write" : "Read",
- info->access_size, current->comm,
- task_pid_nr(current));
+ print_error_description(info);
+ pr_err("\n");
+
+ if (!addr_has_shadow(info)) {
dump_stack();
} else {
- print_error_description(info);
- print_address_description(info);
+ print_address_description((void *)info->access_addr);
+ pr_err("\n");
print_shadow_for_address(info->first_bad_addr);
}
kasan_end_report(&flags);
}
+static unsigned long kasan_flags;
+
+#define KASAN_BIT_REPORTED 0
+#define KASAN_BIT_MULTI_SHOT 1
+
+bool kasan_save_enable_multi_shot(void)
+{
+ return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+}
+EXPORT_SYMBOL_GPL(kasan_save_enable_multi_shot);
+
+void kasan_restore_multi_shot(bool enabled)
+{
+ if (!enabled)
+ clear_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+}
+EXPORT_SYMBOL_GPL(kasan_restore_multi_shot);
+
+static int __init kasan_set_multi_shot(char *str)
+{
+ set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+ return 1;
+}
+__setup("kasan_multi_shot", kasan_set_multi_shot);
+
+static inline bool kasan_report_enabled(void)
+{
+ if (current->kasan_depth)
+ return false;
+ if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
+ return true;
+ return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags);
+}
+
void kasan_report(unsigned long addr, size_t size,
bool is_write, unsigned long ip)
{
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index d1380ed..20cf3be 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1442,6 +1442,8 @@
if (page_count(page) == 0)
continue;
scan_block(page, page + 1, NULL);
+ if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page))))
+ cond_resched();
}
}
put_online_mems();
diff --git a/mm/madvise.c b/mm/madvise.c
index 088a5b22..bb58a44 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -228,15 +228,14 @@
{
struct file *file = vma->vm_file;
+ *prev = vma;
#ifdef CONFIG_SWAP
if (!file) {
- *prev = vma;
force_swapin_readahead(vma, start, end);
return 0;
}
if (shmem_mapping(file->f_mapping)) {
- *prev = vma;
force_shm_swapin_readahead(vma, start, end,
file->f_mapping);
return 0;
@@ -251,7 +250,6 @@
return 0;
}
- *prev = vma;
start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
if (end > vma->vm_end)
end = vma->vm_end;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2a800c4..5008815 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5531,7 +5531,7 @@
next = page->lru.next;
VM_BUG_ON_PAGE(PageLRU(page), page);
- VM_BUG_ON_PAGE(page_count(page), page);
+ VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page);
if (!page->mem_cgroup)
continue;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ce7d416..5aa71a8 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -535,6 +535,13 @@
*/
ClearPageActive(p);
ClearPageUnevictable(p);
+
+ /*
+ * Poisoned page might never drop its ref count to 0 so we have
+ * to uncharge it manually from its memcg.
+ */
+ mem_cgroup_uncharge(p);
+
/*
* drop the page count elevated by isolate_lru_page()
*/
diff --git a/mm/mmap.c b/mm/mmap.c
index 6f90f07..4b412e2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2249,7 +2249,8 @@
gap_addr = TASK_SIZE;
next = vma->vm_next;
- if (next && next->vm_start < gap_addr) {
+ if (next && next->vm_start < gap_addr &&
+ (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
if (!(next->vm_flags & VM_GROWSUP))
return -ENOMEM;
/* Check that both stack segments have the same anon_vma? */
@@ -2333,7 +2334,8 @@
if (gap_addr > address)
return -ENOMEM;
prev = vma->vm_prev;
- if (prev && prev->vm_end > gap_addr) {
+ if (prev && prev->vm_end > gap_addr &&
+ (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
if (!(prev->vm_flags & VM_GROWSDOWN))
return -ENOMEM;
/* Check that both stack segments have the same anon_vma? */
@@ -2547,9 +2549,11 @@
struct vm_area_struct *new;
int err;
- if (is_vm_hugetlb_page(vma) && (addr &
- ~(huge_page_mask(hstate_vma(vma)))))
- return -EINVAL;
+ if (vma->vm_ops && vma->vm_ops->split) {
+ err = vma->vm_ops->split(vma, addr);
+ if (err)
+ return err;
+ }
new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!new)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d631d25..4a18415 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -524,7 +524,6 @@
*/
set_bit(MMF_UNSTABLE, &mm->flags);
- tlb_gather_mmu(&tlb, mm, 0, -1);
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
if (is_vm_hugetlb_page(vma))
continue;
@@ -546,11 +545,13 @@
* we do not want to block exit_mmap by keeping mm ref
* count elevated without a good reason.
*/
- if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED))
+ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
+ tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end);
unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
&details);
+ tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end);
+ }
}
- tlb_finish_mmu(&tlb, 0, -1);
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)),
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4a04413..94018ea 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2592,30 +2592,23 @@
* Update NUMA hit/miss statistics
*
* Must be called with interrupts disabled.
- *
- * When __GFP_OTHER_NODE is set assume the node of the preferred
- * zone is the local node. This is useful for daemons who allocate
- * memory on behalf of other processes.
*/
static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
gfp_t flags)
{
#ifdef CONFIG_NUMA
- int local_nid = numa_node_id();
enum zone_stat_item local_stat = NUMA_LOCAL;
- if (unlikely(flags & __GFP_OTHER_NODE)) {
+ if (z->node != numa_node_id())
local_stat = NUMA_OTHER;
- local_nid = preferred_zone->node;
- }
- if (z->node == local_nid) {
+ if (z->node == preferred_zone->node)
__inc_zone_state(z, NUMA_HIT);
- __inc_zone_state(z, local_stat);
- } else {
+ else {
__inc_zone_state(z, NUMA_MISS);
__inc_zone_state(preferred_zone, NUMA_FOREIGN);
}
+ __inc_zone_state(z, local_stat);
#endif
}
@@ -2828,9 +2821,6 @@
if (!area->nr_free)
continue;
- if (alloc_harder)
- return true;
-
for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
if (!list_empty(&area->free_list[mt]))
return true;
@@ -2842,6 +2832,9 @@
return true;
}
#endif
+ if (alloc_harder &&
+ !list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+ return true;
}
return false;
}
@@ -3429,12 +3422,6 @@
}
/*
- * Maximum number of reclaim retries without any progress before OOM killer
- * is consider as the only way to move forward.
- */
-#define MAX_RECLAIM_RETRIES 16
-
-/*
* Checks whether it makes sense to retry the reclaim to make a forward progress
* for the given allocation request.
* The reclaim feedback represented by did_some_progress (any progress during
@@ -4392,7 +4379,8 @@
K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
node_page_state(pgdat, NR_PAGES_SCANNED),
- !pgdat_reclaimable(pgdat) ? "yes" : "no");
+ pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
+ "yes" : "no");
}
for_each_populated_zone(zone) {
@@ -7309,11 +7297,18 @@
/*
* In case of -EBUSY, we'd like to know which page causes problem.
- * So, just fall through. We will check it in test_pages_isolated().
+ * So, just fall through. test_pages_isolated() has a tracepoint
+ * which will report the busy page.
+ *
+ * It is possible that busy pages could become available before
+ * the call to test_pages_isolated, and the range will actually be
+ * allocated. So, if we fall through be sure to clear ret so that
+ * -EBUSY is not accidentally used or returned to caller.
*/
ret = __alloc_contig_migrate_range(&cc, start, end);
if (ret && ret != -EBUSY)
goto done;
+ ret =0;
/*
* Pages from [start, end) are within a MAX_ORDER_NR_PAGES
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 622f6b6..0dc614d 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -458,6 +458,9 @@
static int shutdown_cache(struct kmem_cache *s,
struct list_head *release, bool *need_rcu_barrier)
{
+ /* free asan quarantined objects */
+ kasan_cache_shutdown(s);
+
if (__kmem_cache_shutdown(s) != 0)
return -EBUSY;
@@ -741,7 +744,6 @@
get_online_cpus();
get_online_mems();
- kasan_cache_destroy(s);
mutex_lock(&slab_mutex);
s->refcount--;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 30a88b9..f118dc2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2606,6 +2606,15 @@
} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
+ /*
+ * Kswapd gives up on balancing particular nodes after too
+ * many failures to reclaim anything from them and goes to
+ * sleep. On reclaim progress, reset the failure counter. A
+ * successful direct reclaim run will revive a dormant kswapd.
+ */
+ if (reclaimable)
+ pgdat->kswapd_failures = 0;
+
return reclaimable;
}
@@ -2680,10 +2689,6 @@
GFP_KERNEL | __GFP_HARDWALL))
continue;
- if (sc->priority != DEF_PRIORITY &&
- !pgdat_reclaimable(zone->zone_pgdat))
- continue; /* Let kswapd poll it */
-
/*
* If we already have plenty of memory free for
* compaction in this zone, don't free any more.
@@ -2820,7 +2825,7 @@
return 0;
}
-static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
+static bool allow_direct_reclaim(pg_data_t *pgdat)
{
struct zone *zone;
unsigned long pfmemalloc_reserve = 0;
@@ -2828,6 +2833,9 @@
int i;
bool wmark_ok;
+ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+ return true;
+
for (i = 0; i <= ZONE_NORMAL; i++) {
zone = &pgdat->node_zones[i];
if (!managed_zone(zone) ||
@@ -2908,7 +2916,7 @@
/* Throttle based on the first usable node */
pgdat = zone->zone_pgdat;
- if (pfmemalloc_watermark_ok(pgdat))
+ if (allow_direct_reclaim(pgdat))
goto out;
break;
}
@@ -2930,14 +2938,14 @@
*/
if (!(gfp_mask & __GFP_FS)) {
wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
- pfmemalloc_watermark_ok(pgdat), HZ);
+ allow_direct_reclaim(pgdat), HZ);
goto check_pending;
}
/* Throttle until kswapd wakes the process */
wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
- pfmemalloc_watermark_ok(pgdat));
+ allow_direct_reclaim(pgdat));
check_pending:
if (fatal_signal_pending(current))
@@ -3116,7 +3124,7 @@
/*
* The throttled processes are normally woken up in balance_pgdat() as
- * soon as pfmemalloc_watermark_ok() is true. But there is a potential
+ * soon as allow_direct_reclaim() is true. But there is a potential
* race between when kswapd checks the watermarks and a process gets
* throttled. There is also a potential race if processes get
* throttled, kswapd wakes, a large process exits thereby balancing the
@@ -3130,6 +3138,10 @@
if (waitqueue_active(&pgdat->pfmemalloc_wait))
wake_up_all(&pgdat->pfmemalloc_wait);
+ /* Hopeless node, leave it to direct reclaim */
+ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+ return true;
+
for (i = 0; i <= classzone_idx; i++) {
struct zone *zone = pgdat->node_zones + i;
@@ -3216,9 +3228,9 @@
count_vm_event(PAGEOUTRUN);
do {
+ unsigned long nr_reclaimed = sc.nr_reclaimed;
bool raise_priority = true;
- sc.nr_reclaimed = 0;
sc.reclaim_idx = classzone_idx;
/*
@@ -3297,7 +3309,7 @@
* able to safely make forward progress. Wake them
*/
if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
- pfmemalloc_watermark_ok(pgdat))
+ allow_direct_reclaim(pgdat))
wake_up_all(&pgdat->pfmemalloc_wait);
/* Check if kswapd should be suspending */
@@ -3308,10 +3320,14 @@
* Raise priority if scanning rate is too low or there was no
* progress in reclaiming pages
*/
- if (raise_priority || !sc.nr_reclaimed)
+ nr_reclaimed = sc.nr_reclaimed - nr_reclaimed;
+ if (raise_priority || !nr_reclaimed)
sc.priority--;
} while (sc.priority >= 1);
+ if (!sc.nr_reclaimed)
+ pgdat->kswapd_failures++;
+
out:
/*
* Return the order kswapd stopped reclaiming at as
@@ -3511,6 +3527,10 @@
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
+ /* Hopeless node, leave it to direct reclaim */
+ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+ return;
+
/* Only wake kswapd if all zones are unbalanced */
for (z = 0; z <= classzone_idx; z++) {
zone = pgdat->node_zones + z;
@@ -3781,9 +3801,6 @@
sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
return NODE_RECLAIM_FULL;
- if (!pgdat_reclaimable(pgdat))
- return NODE_RECLAIM_FULL;
-
/*
* Do not scan if the allocation should not be delayed.
*/
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 604f26a..3863b5d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -932,6 +932,7 @@
"nr_slab_unreclaimable",
"nr_page_table_pages",
"nr_kernel_stack",
+ "nr_overhead",
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
@@ -1420,7 +1421,7 @@
"\n node_unreclaimable: %u"
"\n start_pfn: %lu"
"\n node_inactive_ratio: %u",
- !pgdat_reclaimable(zone->zone_pgdat),
+ pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
zone->zone_start_pfn,
zone->zone_pgdat->inactive_ratio);
seq_putc(m, '\n');
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 1689bb5..d3548c4 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1407,7 +1407,7 @@
* pools/users, we can't allow mapping in interrupt context
* because it can corrupt another users mappings.
*/
- WARN_ON_ONCE(in_interrupt());
+ BUG_ON(in_interrupt());
/* From now on, migration cannot move the object */
pin_tag(handle);
diff --git a/mm/zswap.c b/mm/zswap.c
index dbef278..ded051e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -752,18 +752,22 @@
pool = zswap_pool_find_get(type, compressor);
if (pool) {
zswap_pool_debug("using existing", pool);
+ WARN_ON(pool == zswap_pool_current());
list_del_rcu(&pool->list);
- } else {
- spin_unlock(&zswap_pools_lock);
- pool = zswap_pool_create(type, compressor);
- spin_lock(&zswap_pools_lock);
}
+ spin_unlock(&zswap_pools_lock);
+
+ if (!pool)
+ pool = zswap_pool_create(type, compressor);
+
if (pool)
ret = param_set_charp(s, kp);
else
ret = -EINVAL;
+ spin_lock(&zswap_pools_lock);
+
if (!ret) {
put_pool = zswap_pool_current();
list_add_rcu(&pool->list, &zswap_pools);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 4a47074..c8ea3cf 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -111,12 +111,7 @@
vlan_gvrp_uninit_applicant(real_dev);
}
- /* Take it out of our own structures, but be sure to interlock with
- * HW accelerating devices or SW vlan input packet processing if
- * VLAN is not 0 (leave it there for 802.1p).
- */
- if (vlan_id)
- vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
+ vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index ffd09c1..2bbca23 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3353,9 +3353,10 @@
break;
case L2CAP_CONF_EFS:
- remote_efs = 1;
- if (olen == sizeof(efs))
+ if (olen == sizeof(efs)) {
+ remote_efs = 1;
memcpy(&efs, (void *) val, olen);
+ }
break;
case L2CAP_CONF_EWS:
@@ -3574,16 +3575,17 @@
break;
case L2CAP_CONF_EFS:
- if (olen == sizeof(efs))
+ if (olen == sizeof(efs)) {
memcpy(&efs, (void *)val, olen);
- if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
- efs.stype != L2CAP_SERV_NOTRAFIC &&
- efs.stype != chan->local_stype)
- return -ECONNREFUSED;
+ if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
+ efs.stype != L2CAP_SERV_NOTRAFIC &&
+ efs.stype != chan->local_stype)
+ return -ECONNREFUSED;
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
- (unsigned long) &efs, endptr - ptr);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
+ (unsigned long) &efs, endptr - ptr);
+ }
break;
case L2CAP_CONF_FCS:
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index aa1df1a..82ce571 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -706,18 +706,20 @@
static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge;
- unsigned int mtu_reserved;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ unsigned int mtu, mtu_reserved;
mtu_reserved = nf_bridge_mtu_reduction(skb);
+ mtu = skb->dev->mtu;
- if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
+ if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
+ mtu = nf_bridge->frag_max_size;
+
+ if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb);
}
- nf_bridge = nf_bridge_info_get(skb);
-
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 5d4006e..4f83122 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1092,19 +1092,20 @@
struct net_bridge *br = netdev_priv(dev);
int err;
+ err = register_netdevice(dev);
+ if (err)
+ return err;
+
if (tb[IFLA_ADDRESS]) {
spin_lock_bh(&br->lock);
br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
spin_unlock_bh(&br->lock);
}
- err = register_netdevice(dev);
- if (err)
- return err;
-
err = br_changelink(dev, tb, data);
if (err)
- unregister_netdevice(dev);
+ br_dev_delete(dev, NULL);
+
return err;
}
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 5488e4a..ac1552d 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -722,13 +722,12 @@
if (unlikely(!net_eq(dev_net(dev), &init_net)))
goto drop;
- if (WARN_ONCE(dev->type != ARPHRD_CAN ||
- skb->len != CAN_MTU ||
- cfd->len > CAN_MAX_DLEN,
- "PF_CAN: dropped non conform CAN skbuf: "
- "dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len))
+ if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU ||
+ cfd->len > CAN_MAX_DLEN)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n",
+ dev->type, skb->len, cfd->len);
goto drop;
+ }
can_receive(skb, dev);
return NET_RX_SUCCESS;
@@ -746,13 +745,12 @@
if (unlikely(!net_eq(dev_net(dev), &init_net)))
goto drop;
- if (WARN_ONCE(dev->type != ARPHRD_CAN ||
- skb->len != CANFD_MTU ||
- cfd->len > CANFD_MAX_DLEN,
- "PF_CAN: dropped non conform CAN FD skbuf: "
- "dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len))
+ if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU ||
+ cfd->len > CANFD_MAX_DLEN)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n",
+ dev->type, skb->len, cfd->len);
goto drop;
+ }
can_receive(skb, dev);
return NET_RX_SUCCESS;
diff --git a/net/core/dev.c b/net/core/dev.c
index c378918..67b5d4d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1304,6 +1304,7 @@
{
rtnl_lock();
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+ call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
rtnl_unlock();
}
EXPORT_SYMBOL(netdev_notify_peers);
@@ -3082,10 +3083,21 @@
hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
/* + transport layer */
- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- hdr_len += tcp_hdrlen(skb);
- else
- hdr_len += sizeof(struct udphdr);
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+ const struct tcphdr *th;
+ struct tcphdr _tcphdr;
+
+ th = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_tcphdr), &_tcphdr);
+ if (likely(th))
+ hdr_len += __tcp_hdrlen(th);
+ } else {
+ struct udphdr _udphdr;
+
+ if (skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_udphdr), &_udphdr))
+ hdr_len += sizeof(struct udphdr);
+ }
if (shinfo->gso_type & SKB_GSO_DODGY)
gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index e9989b8..7913771 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -742,15 +742,6 @@
return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
}
-static void
-warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
-{
- char name[sizeof(current->comm)];
-
- pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
- get_task_comm(name, current), details);
-}
-
/* Query device for its ethtool_cmd settings.
*
* Backward compatibility note: for compatibility with legacy ethtool,
@@ -777,10 +768,8 @@
&link_ksettings);
if (err < 0)
return err;
- if (!convert_link_ksettings_to_legacy_settings(&cmd,
- &link_ksettings))
- warn_incomplete_ethtool_legacy_settings_conversion(
- "link modes are only partially reported");
+ convert_link_ksettings_to_legacy_settings(&cmd,
+ &link_ksettings);
/* send a sensible cmd tag back to user */
cmd.cmd = ETHTOOL_GSET;
diff --git a/net/core/filter.c b/net/core/filter.c
index 5e42e0e..c385c55 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -451,6 +451,10 @@
convert_bpf_extensions(fp, &insn))
break;
+ if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
+ fp->code == (BPF_ALU | BPF_MOD | BPF_X))
+ *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
+
*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
break;
@@ -1015,11 +1019,9 @@
*/
goto out_err_free;
- /* We are guaranteed to never error here with cBPF to eBPF
- * transitions, since there's no issue with type compatibility
- * checks on program arrays.
- */
fp = bpf_prog_select_runtime(fp, &err);
+ if (err)
+ goto out_err_free;
kfree(old_prog);
return fp;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 32e4e01..862d63e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -550,8 +550,8 @@
out_good:
ret = true;
- key_control->thoff = (u16)nhoff;
out:
+ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
key_basic->n_proto = proto;
key_basic->ip_proto = ip_proto;
@@ -559,7 +559,6 @@
out_bad:
ret = false;
- key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
goto out;
}
EXPORT_SYMBOL(__skb_flow_dissect);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f45f619..7b31566 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -496,7 +496,7 @@
if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
+ hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
if (n->parms->dead) {
rc = ERR_PTR(-EINVAL);
@@ -508,7 +508,7 @@
n1 != NULL;
n1 = rcu_dereference_protected(n1->next,
lockdep_is_held(&tbl->lock))) {
- if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
+ if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
if (want_ref)
neigh_hold(n1);
rc = n1;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7001da9..b7efe2f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -263,7 +263,7 @@
spin_lock_irqsave(&net->nsid_lock, flags);
peer = idr_find(&net->netns_ids, id);
if (peer)
- get_net(peer);
+ peer = maybe_get_net(peer);
spin_unlock_irqrestore(&net->nsid_lock, flags);
rcu_read_unlock();
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index aec5605..a645155 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3823,7 +3823,7 @@
struct sock *sk = skb->sk;
if (!skb_may_tx_timestamp(sk, false))
- return;
+ goto err;
/* Take a reference to prevent skb_orphan() from freeing the socket,
* but only if the socket refcount is not zero.
@@ -3832,7 +3832,11 @@
*skb_hwtstamps(skb) = *hwtstamps;
__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
sock_put(sk);
+ return;
}
+
+err:
+ kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index acd2a6c..fb467db 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -295,7 +295,7 @@
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_SOCK_DIAG, AF_INET);
+ NETLINK_SOCK_DIAG, AF_INET6);
break;
}
return 0;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 77f396b..5dce429 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -93,6 +93,16 @@
return more_reuse;
}
+static void reuseport_free_rcu(struct rcu_head *head)
+{
+ struct sock_reuseport *reuse;
+
+ reuse = container_of(head, struct sock_reuseport, rcu);
+ if (reuse->prog)
+ bpf_prog_destroy(reuse->prog);
+ kfree(reuse);
+}
+
/**
* reuseport_add_sock - Add a socket to the reuseport group of another.
* @sk: New socket to add to the group.
@@ -101,7 +111,7 @@
*/
int reuseport_add_sock(struct sock *sk, struct sock *sk2)
{
- struct sock_reuseport *reuse;
+ struct sock_reuseport *old_reuse, *reuse;
if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
int err = reuseport_alloc(sk2);
@@ -112,10 +122,13 @@
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- "socket already in reuseport group");
+ lockdep_is_held(&reuseport_lock));
+ old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ if (old_reuse && old_reuse->num_socks != 1) {
+ spin_unlock_bh(&reuseport_lock);
+ return -EBUSY;
+ }
if (reuse->num_socks == reuse->max_socks) {
reuse = reuseport_grow(reuse);
@@ -133,19 +146,11 @@
spin_unlock_bh(&reuseport_lock);
+ if (old_reuse)
+ call_rcu(&old_reuse->rcu, reuseport_free_rcu);
return 0;
}
-static void reuseport_free_rcu(struct rcu_head *head)
-{
- struct sock_reuseport *reuse;
-
- reuse = container_of(head, struct sock_reuseport, rcu);
- if (reuse->prog)
- bpf_prog_destroy(reuse->prog);
- kfree(reuse);
-}
-
void reuseport_detach_sock(struct sock *sk)
{
struct sock_reuseport *reuse;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 0df2aa6..1b46190 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -292,7 +292,13 @@
.data = &bpf_jit_enable,
.maxlen = sizeof(int),
.mode = 0644,
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
.proc_handler = proc_dointvec
+#else
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &one,
+#endif
},
# ifdef CONFIG_HAVE_EBPF_JIT
{
@@ -369,14 +375,16 @@
.data = &sysctl_net_busy_poll,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
{
.procname = "busy_read",
.data = &sysctl_net_busy_read,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
#endif
#ifdef CONFIG_NET_SCHED
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 5e3a730..7753681 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -140,6 +140,9 @@
ccid2_pr_debug("RTO_EXPIRE\n");
+ if (sk->sk_state == DCCP_CLOSED)
+ goto out;
+
/* back-off timer */
hc->tx_rto <<= 1;
if (hc->tx_rto > DCCP_RTO_MAX)
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 39e7e2b..62522b8 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -57,10 +57,16 @@
if (state == DCCP_TIME_WAIT)
timeo = DCCP_TIMEWAIT_LEN;
+ /* tw_timer is pinned, so we need to make sure BH are disabled
+ * in following section, otherwise timer handler could run before
+ * we complete the initialization.
+ */
+ local_bh_disable();
inet_twsk_schedule(tw, timeo);
/* Linkage updates. */
__inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
inet_twsk_put(tw);
+ local_bh_enable();
} else {
/* Sorry, if we're out of memory, just CLOSE this
* socket up. We've got bigger problems than
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b68168f..9d43c1f 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -259,6 +259,7 @@
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
+ struct dccp_sock *dp = dccp_sk(sk);
int err = 0;
const int old_state = sk->sk_state;
@@ -278,6 +279,10 @@
sk->sk_err = ECONNRESET;
dccp_clear_xmit_timers(sk);
+ ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+ dp->dccps_hc_rx_ccid = NULL;
+ dp->dccps_hc_tx_ccid = NULL;
__skb_queue_purge(&sk->sk_receive_queue);
__skb_queue_purge(&sk->sk_write_queue);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 51b27ae..e60517e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -223,11 +223,16 @@
static int arp_constructor(struct neighbour *neigh)
{
- __be32 addr = *(__be32 *)neigh->primary_key;
+ __be32 addr;
struct net_device *dev = neigh->dev;
struct in_device *in_dev;
struct neigh_parms *parms;
+ u32 inaddr_any = INADDR_ANY;
+ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+ memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len);
+
+ addr = *(__be32 *)neigh->primary_key;
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (!in_dev) {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 062a67c..f08f984 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1380,7 +1380,7 @@
static bool inetdev_valid_mtu(unsigned int mtu)
{
- return mtu >= 68;
+ return mtu >= IPV4_MIN_MTU;
}
static void inetdev_send_gratuitous_arp(struct net_device *dev,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 2ec005c..08b7260 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1254,14 +1254,19 @@
static void ip_fib_net_exit(struct net *net)
{
- unsigned int i;
+ int i;
rtnl_lock();
#ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
#endif
- for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+ /* Destroy the tables in reverse order to guarantee that the
+ * local table, ID 255, is destroyed before the main table, ID
+ * 254. This is necessary as the local table may contain
+ * references to data contained in the main table.
+ */
+ for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
struct hlist_head *head = &net->ipv4.fib_table_hash[i];
struct hlist_node *tmp;
struct fib_table *tb;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 691146a..42a19fb 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -768,7 +768,7 @@
}
/*
- * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and
+ * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and
* ICMP_PARAMETERPROB.
*/
@@ -796,7 +796,8 @@
if (iph->ihl < 5) /* Mangled header, drop. */
goto out_err;
- if (icmph->type == ICMP_DEST_UNREACH) {
+ switch (icmph->type) {
+ case ICMP_DEST_UNREACH:
switch (icmph->code & 15) {
case ICMP_NET_UNREACH:
case ICMP_HOST_UNREACH:
@@ -832,8 +833,16 @@
}
if (icmph->code > NR_ICMP_UNREACH)
goto out;
- } else if (icmph->type == ICMP_PARAMETERPROB)
+ break;
+ case ICMP_PARAMETERPROB:
info = ntohl(icmph->un.gateway) >> 24;
+ break;
+ case ICMP_TIME_EXCEEDED:
+ __ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS);
+ if (icmph->code == ICMP_EXC_FRAGTIME)
+ goto out;
+ break;
+ }
/*
* Throw it at our lower layers
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 08575e3..7f5fe07 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -89,6 +89,7 @@
#include <linux/rtnetlink.h>
#include <linux/times.h>
#include <linux/pkt_sched.h>
+#include <linux/byteorder/generic.h>
#include <net/net_namespace.h>
#include <net/arp.h>
@@ -321,6 +322,23 @@
return scount;
}
+/* source address selection per RFC 3376 section 4.2.13 */
+static __be32 igmpv3_get_srcaddr(struct net_device *dev,
+ const struct flowi4 *fl4)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+ if (!in_dev)
+ return htonl(INADDR_ANY);
+
+ for_ifa(in_dev) {
+ if (fl4->saddr == ifa->ifa_local)
+ return fl4->saddr;
+ } endfor_ifa(in_dev);
+
+ return htonl(INADDR_ANY);
+}
+
static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
{
struct sk_buff *skb;
@@ -368,7 +386,11 @@
pip->frag_off = htons(IP_DF);
pip->ttl = 1;
pip->daddr = fl4.daddr;
- pip->saddr = fl4.saddr;
+
+ rcu_read_lock();
+ pip->saddr = igmpv3_get_srcaddr(dev, &fl4);
+ rcu_read_unlock();
+
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
ip_select_ident(net, skb, NULL);
@@ -404,16 +426,17 @@
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
- int type, struct igmpv3_grec **ppgr)
+ int type, struct igmpv3_grec **ppgr, unsigned int mtu)
{
struct net_device *dev = pmc->interface->dev;
struct igmpv3_report *pih;
struct igmpv3_grec *pgr;
- if (!skb)
- skb = igmpv3_newpack(dev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = igmpv3_newpack(dev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = (struct igmpv3_grec *)skb_put(skb, sizeof(struct igmpv3_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -436,12 +459,17 @@
struct igmpv3_grec *pgr = NULL;
struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV4_MIN_MTU)
+ return skb;
+
isquery = type == IGMPV3_MODE_IS_INCLUDE ||
type == IGMPV3_MODE_IS_EXCLUDE;
truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -462,7 +490,7 @@
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
}
}
first = 1;
@@ -498,12 +526,12 @@
pgr->grec_nsrcs = htons(scount);
if (skb)
igmpv3_sendpack(skb);
- skb = igmpv3_newpack(dev, dev->mtu);
+ skb = igmpv3_newpack(dev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -538,7 +566,7 @@
igmpv3_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 453db95..4bf3b8a 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -198,6 +198,7 @@
qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
+ rcu_read_lock();
spin_lock(&qp->q.lock);
if (qp->q.flags & INET_FRAG_COMPLETE)
@@ -207,7 +208,7 @@
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
if (!inet_frag_evicting(&qp->q)) {
- struct sk_buff *head = qp->q.fragments;
+ struct sk_buff *clone, *head = qp->q.fragments;
const struct iphdr *iph;
int err;
@@ -216,32 +217,40 @@
if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
goto out;
- rcu_read_lock();
head->dev = dev_get_by_index_rcu(net, qp->iif);
if (!head->dev)
- goto out_rcu_unlock;
+ goto out;
+
/* skb has no dst, perform route lookup again */
iph = ip_hdr(head);
err = ip_route_input_noref(head, iph->daddr, iph->saddr,
iph->tos, head->dev);
if (err)
- goto out_rcu_unlock;
+ goto out;
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
*/
if (frag_expire_skip_icmp(qp->user) &&
(skb_rtable(head)->rt_type != RTN_LOCAL))
- goto out_rcu_unlock;
+ goto out;
+
+ clone = skb_clone(head, GFP_ATOMIC);
/* Send an ICMP "Fragment Reassembly Timeout" message. */
- icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-out_rcu_unlock:
- rcu_read_unlock();
+ if (clone) {
+ spin_unlock(&qp->q.lock);
+ icmp_send(clone, ICMP_TIME_EXCEEDED,
+ ICMP_EXC_FRAGTIME, 0);
+ consume_skb(clone);
+ goto out_rcu_unlock;
+ }
}
out:
spin_unlock(&qp->q.lock);
+out_rcu_unlock:
+ rcu_read_unlock();
ipq_put(qp);
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index bd7f183..96536a0 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -346,8 +346,8 @@
dev->needed_headroom = t_hlen + hlen;
mtu -= (dev->hard_header_len + t_hlen);
- if (mtu < 68)
- mtu = 68;
+ if (mtu < IPV4_MIN_MTU)
+ mtu = IPV4_MIN_MTU;
return mtu;
}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 071a785..b23464d 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -306,7 +306,7 @@
while ((d = next)) {
next = d->next;
dev = d->dev;
- if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) {
+ if (d != ic_dev && !netdev_uses_dsa(dev)) {
pr_debug("IP-Config: Downing %s\n", dev->name);
dev_change_flags(dev, d->flags);
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 713c09a..0c9ded2 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -158,6 +158,10 @@
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
+
+ if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
+ return NF_ACCEPT;
+
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index f8aad03..6f5e8d0 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -255,11 +255,6 @@
/* maniptype == SRC for postrouting. */
enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
- /* We never see fragments: conntrack defrags on pre-routing
- * and local-out, and nf_nat_out protects post-routing.
- */
- NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
-
ct = nf_ct_get(skb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would
* have dropped it. Hence it's the user's responsibilty to
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 5a8f7c3..53e49f5 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1260,16 +1260,6 @@
.timeout = 180,
};
-static struct nf_conntrack_helper snmp_helper __read_mostly = {
- .me = THIS_MODULE,
- .help = help,
- .expect_policy = &snmp_exp_policy,
- .name = "snmp",
- .tuple.src.l3num = AF_INET,
- .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
- .tuple.dst.protonum = IPPROTO_UDP,
-};
-
static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
.me = THIS_MODULE,
.help = help,
@@ -1288,17 +1278,10 @@
static int __init nf_nat_snmp_basic_init(void)
{
- int ret = 0;
-
BUG_ON(nf_nat_snmp_hook != NULL);
RCU_INIT_POINTER(nf_nat_snmp_hook, help);
- ret = nf_conntrack_helper_register(&snmp_trap_helper);
- if (ret < 0) {
- nf_conntrack_helper_unregister(&snmp_helper);
- return ret;
- }
- return ret;
+ return nf_conntrack_helper_register(&snmp_trap_helper);
}
static void __exit nf_nat_snmp_basic_fini(void)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ab0bbcb..a860df2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -502,11 +502,16 @@
int err;
struct ip_options_data opt_copy;
struct raw_frag_vec rfv;
+ int hdrincl;
err = -EMSGSIZE;
if (len > 0xFFFF)
goto out;
+ /* hdrincl should be READ_ONCE(inet->hdrincl)
+ * but READ_ONCE() doesn't work with bit fields
+ */
+ hdrincl = inet->hdrincl;
/*
* Check the flags.
*/
@@ -582,7 +587,7 @@
/* Linux does not mangle headers on raw sockets,
* so that IP options + IP_HDRINCL is non-sense.
*/
- if (inet->hdrincl)
+ if (hdrincl)
goto done;
if (ipc.opt->opt.srr) {
if (!daddr)
@@ -604,12 +609,12 @@
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
- (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
+ (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);
- if (!inet->hdrincl) {
+ if (!hdrincl) {
rfv.msg = msg;
rfv.hlen = 0;
@@ -634,7 +639,7 @@
goto do_confirm;
back_from_confirm:
- if (inet->hdrincl)
+ if (hdrincl)
err = raw_send_hdrinc(sk, &fl4, msg, len,
&rt, msg->msg_flags, &ipc.sockc);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b21bf59..1a6b2e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -633,9 +633,12 @@
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe;
struct rtable *rt;
+ u32 genid, hval;
unsigned int i;
int depth;
- u32 hval = fnhe_hashfun(daddr);
+
+ genid = fnhe_genid(dev_net(nh->nh_dev));
+ hval = fnhe_hashfun(daddr);
spin_lock_bh(&fnhe_lock);
@@ -658,12 +661,13 @@
}
if (fnhe) {
+ if (fnhe->fnhe_genid != genid)
+ fnhe->fnhe_genid = genid;
if (gw)
fnhe->fnhe_gw = gw;
- if (pmtu) {
+ if (pmtu)
fnhe->fnhe_pmtu = pmtu;
- fnhe->fnhe_expires = max(1UL, expires);
- }
+ fnhe->fnhe_expires = max(1UL, expires);
/* Update all cached dsts too */
rt = rcu_dereference(fnhe->fnhe_rth_input);
if (rt)
@@ -682,7 +686,7 @@
fnhe->fnhe_next = hash->chain;
rcu_assign_pointer(hash->chain, fnhe);
}
- fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
+ fnhe->fnhe_genid = genid;
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
fnhe->fnhe_pmtu = pmtu;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9b2d1b8..7ea8027 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -538,7 +538,7 @@
if (tp->urg_data & TCP_URG_VALID)
mask |= POLLPRI;
- } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
+ } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
/* Active TCP fastopen socket with defer_connect
* Return POLLOUT so application can call write()
* in order for kernel to generate SYN+data
@@ -1115,9 +1115,14 @@
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
err = __inet_stream_connect(sk->sk_socket, uaddr,
msg->msg_namelen, flags);
- inet->defer_connect = 0;
- *copied = tp->fastopen_req->copied;
- tcp_free_fastopen_req(tp);
+ /* fastopen_req could already be freed in __inet_stream_connect
+ * if the connection times out or gets rst
+ */
+ if (tp->fastopen_req) {
+ *copied = tp->fastopen_req->copied;
+ tcp_free_fastopen_req(tp);
+ inet->defer_connect = 0;
+ }
return err;
}
@@ -2232,6 +2237,9 @@
tcp_send_active_reset(sk, GFP_ATOMIC);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
+ } else if (!check_net(sock_net(sk))) {
+ /* Not possible to send reset; just close */
+ tcp_set_state(sk, TCP_CLOSE);
}
}
@@ -2314,6 +2322,7 @@
tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
@@ -2327,8 +2336,18 @@
sk->sk_rx_dst = NULL;
tcp_saved_syn_free(tp);
+ /* Clean up fastopen related fields */
+ tcp_free_fastopen_req(tp);
+ inet->defer_connect = 0;
+
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
+ if (sk->sk_frag.page) {
+ put_page(sk->sk_frag.page);
+ sk->sk_frag.page = NULL;
+ sk->sk_frag.offset = 0;
+ }
+
sk->sk_error_report(sk);
return err;
}
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index cb8db34..8ec6053 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -81,7 +81,8 @@
u32 lt_last_lost; /* LT intvl start: tp->lost */
u32 pacing_gain:10, /* current gain for setting pacing rate */
cwnd_gain:10, /* current gain for setting cwnd */
- full_bw_cnt:3, /* number of rounds without large bw gains */
+ full_bw_reached:1, /* reached full bw in Startup? */
+ full_bw_cnt:2, /* number of rounds without large bw gains */
cycle_idx:3, /* current index in pacing_gain cycle array */
has_seen_rtt:1, /* have we seen an RTT sample yet? */
unused_b:5;
@@ -151,7 +152,7 @@
{
const struct bbr *bbr = inet_csk_ca(sk);
- return bbr->full_bw_cnt >= bbr_full_bw_cnt;
+ return bbr->full_bw_reached;
}
/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
@@ -451,7 +452,8 @@
bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
bbr->cycle_mstamp = tp->delivered_mstamp;
- bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx];
+ bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT :
+ bbr_pacing_gain[bbr->cycle_idx];
}
/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
@@ -460,8 +462,7 @@
{
struct bbr *bbr = inet_csk_ca(sk);
- if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw &&
- bbr_is_next_cycle_phase(sk, rs))
+ if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs))
bbr_advance_cycle_phase(sk);
}
@@ -688,6 +689,7 @@
return;
}
++bbr->full_bw_cnt;
+ bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
}
/* If pipe is probably full, drain the queue and then enter steady-state. */
@@ -821,6 +823,7 @@
bbr->restore_cwnd = 0;
bbr->round_start = 0;
bbr->idle_restart = 0;
+ bbr->full_bw_reached = 0;
bbr->full_bw = 0;
bbr->full_bw_cnt = 0;
bbr->cycle_mstamp.v64 = 0;
@@ -840,6 +843,11 @@
*/
static u32 bbr_undo_cwnd(struct sock *sk)
{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
+ bbr->full_bw_cnt = 0;
+ bbr_reset_lt_bw_sampling(sk);
return tcp_sk(sk)->snd_cwnd;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 649d611..759d83e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1967,6 +1967,8 @@
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0;
tp->fackets_out = 0;
+ /* Mark SACK reneging until we recover from this loss event. */
+ tp->is_sack_reneg = 1;
}
tcp_clear_all_retrans_hints(tp);
@@ -2464,6 +2466,7 @@
return true;
}
tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
return false;
}
@@ -2495,8 +2498,10 @@
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0;
- if (frto_undo || tcp_is_sack(tp))
+ if (frto_undo || tcp_is_sack(tp)) {
tcp_set_ca_state(sk, TCP_CA_Open);
+ tp->is_sack_reneg = 0;
+ }
return true;
}
return false;
@@ -3590,6 +3595,7 @@
struct tcp_sacktag_state sack_state;
struct rate_sample rs = { .prior_delivered = 0 };
u32 prior_snd_una = tp->snd_una;
+ bool is_sack_reneg = tp->is_sack_reneg;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
@@ -3712,7 +3718,7 @@
tcp_schedule_loss_probe(sk);
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */
- tcp_rate_gen(sk, delivered, lost, &now, &rs);
+ tcp_rate_gen(sk, delivered, lost, is_sack_reneg, &now, &rs);
tcp_cong_control(sk, ack, delivered, flag, &rs);
tcp_xmit_recovery(sk, rexmit);
return 1;
@@ -5082,7 +5088,7 @@
if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
/* pairs with tcp_poll() */
- smp_mb__after_atomic();
+ smp_mb();
if (sk->sk_socket &&
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
tcp_new_space(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4b8cfda..710d1af 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -836,7 +836,7 @@
tcp_time_stamp,
req->ts_recent,
0,
- tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
+ tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
@@ -1678,7 +1678,9 @@
*/
sock_hold(sk);
refcounted = true;
- nsk = tcp_check_req(sk, skb, req, false);
+ nsk = NULL;
+ if (!tcp_filter(sk, skb))
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
goto discard_and_relse;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 64e1ba4..830a564 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -328,10 +328,16 @@
timeo = TCP_TIMEWAIT_LEN;
}
+ /* tw_timer is pinned, so we need to make sure BH are disabled
+ * in following section, otherwise timer handler could run before
+ * we complete the initialization.
+ */
+ local_bh_disable();
inet_twsk_schedule(tw, timeo);
/* Linkage updates. */
__inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
inet_twsk_put(tw);
+ local_bh_enable();
} else {
/* Sorry, if we're out of memory, just CLOSE this
* socket up. We've got bigger problems than
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index bc68da3..366b1be 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -32,6 +32,9 @@
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4))
+ return ERR_PTR(-EINVAL);
+
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 9be1581..18309f5 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -106,7 +106,7 @@
/* Update the connection delivery information and generate a rate sample. */
void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
- struct skb_mstamp *now, struct rate_sample *rs)
+ bool is_sack_reneg, struct skb_mstamp *now, struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 snd_us, ack_us;
@@ -124,8 +124,12 @@
rs->acked_sacked = delivered; /* freshly ACKed or SACKed */
rs->losses = lost; /* freshly marked lost */
- /* Return an invalid sample if no timing information is available. */
- if (!rs->prior_mstamp.v64) {
+ /* Return an invalid sample if no timing information is available or
+ * in recovery from loss with SACK reneging. Rate samples taken during
+ * a SACK reneging event may overestimate bw by including packets that
+ * were SACKed before the reneg.
+ */
+ if (!rs->prior_mstamp.v64 || is_sack_reneg) {
rs->delivered = -1;
rs->interval_us = -1;
return;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 74db43b..6952338 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -50,11 +50,19 @@
* to prevent DoS attacks. It is called when a retransmission timeout
* or zero probe timeout occurs on orphaned socket.
*
+ * Also close if our net namespace is exiting; in that case there is no
+ * hope of ever communicating again since all netns interfaces are already
+ * down (or about to be down), and we need to release our dst references,
+ * which have been moved to the netns loopback interface, so the namespace
+ * can finish exiting. This condition is only possible if we are a kernel
+ * socket, as those do not hold references to the namespace.
+ *
* Criteria is still not confirmed experimentally and may change.
* We kill the socket, if:
* 1. If number of orphaned sockets exceeds an administratively configured
* limit.
* 2. If we have strong memory pressure.
+ * 3. If our net namespace is exiting.
*/
static int tcp_out_of_resources(struct sock *sk, bool do_reset)
{
@@ -83,6 +91,13 @@
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
}
+
+ if (!check_net(sock_net(sk))) {
+ /* Not possible to send reset; just close */
+ tcp_done(sk);
+ return 1;
+ }
+
return 0;
}
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 4c4bac1..3ecb61e 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,7 +158,7 @@
static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
{
- return min(tp->snd_ssthresh, tp->snd_cwnd-1);
+ return min(tp->snd_ssthresh, tp->snd_cwnd);
}
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 6401574..f4f616e 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -205,6 +205,9 @@
goto out;
}
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ goto out;
+
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 497c79e..b74fd8c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -290,10 +290,10 @@
.keep_addr_on_down = 0,
};
-/* Check if a valid qdisc is available */
-static inline bool addrconf_qdisc_ok(const struct net_device *dev)
+/* Check if link is ready: is it up and is a valid qdisc available */
+static inline bool addrconf_link_ready(const struct net_device *dev)
{
- return !qdisc_tx_is_noop(dev);
+ return netif_oper_up(dev) && !qdisc_tx_is_noop(dev);
}
static void addrconf_del_rs_timer(struct inet6_dev *idev)
@@ -438,7 +438,7 @@
ndev->token = in6addr_any;
- if (netif_running(dev) && addrconf_qdisc_ok(dev))
+ if (netif_running(dev) && addrconf_link_ready(dev))
ndev->if_flags |= IF_READY;
ipv6_mc_init_dev(ndev);
@@ -3397,7 +3397,7 @@
/* restore routes for permanent addresses */
addrconf_permanent_addr(dev);
- if (!addrconf_qdisc_ok(dev)) {
+ if (!addrconf_link_ready(dev)) {
/* device is not ready yet. */
pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
dev->name);
@@ -3412,7 +3412,7 @@
run_pending = 1;
}
} else if (event == NETDEV_CHANGE) {
- if (!addrconf_qdisc_ok(dev)) {
+ if (!addrconf_link_ready(dev)) {
/* device is still not ready. */
break;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 0281645..1111684 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -225,7 +225,6 @@
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk));
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
/* Init the ipv4 part of the socket since we can have sockets
@@ -291,6 +290,7 @@
struct net *net = sock_net(sk);
__be32 v4addr = 0;
unsigned short snum;
+ bool saved_ipv6only;
int addr_type = 0;
int err = 0;
@@ -395,19 +395,21 @@
if (!(addr_type & IPV6_ADDR_MULTICAST))
np->saddr = addr->sin6_addr;
+ saved_ipv6only = sk->sk_ipv6only;
+ if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
+ sk->sk_ipv6only = 1;
+
/* Make sure we are allowed to bind here. */
if ((snum || !inet->bind_address_no_port) &&
sk->sk_prot->get_port(sk, snum)) {
+ sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
err = -EADDRINUSE;
goto out;
}
- if (addr_type != IPV6_ADDR_ANY) {
+ if (addr_type != IPV6_ADDR_ANY)
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
- if (addr_type != IPV6_ADDR_MAPPED)
- sk->sk_ipv6only = 1;
- }
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
inet->inet_sport = htons(inet->inet_num);
@@ -926,12 +928,12 @@
err = register_pernet_subsys(&inet6_net_ops);
if (err)
goto register_pernet_fail;
- err = icmpv6_init();
- if (err)
- goto icmp_fail;
err = ip6_mr_init();
if (err)
goto ipmr_fail;
+ err = icmpv6_init();
+ if (err)
+ goto icmp_fail;
err = ndisc_init();
if (err)
goto ndisc_fail;
@@ -1061,10 +1063,10 @@
ndisc_cleanup();
ndisc_fail:
ip6_mr_cleanup();
-ipmr_fail:
- icmpv6_cleanup();
icmp_fail:
unregister_pernet_subsys(&inet6_net_ops);
+ipmr_fail:
+ icmpv6_cleanup();
register_pernet_fail:
sock_unregister(PF_INET6);
rtnl_unregister_all(PF_INET6);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 65a58fe..21bd54f 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -337,11 +337,12 @@
nt->dev = dev;
nt->net = dev_net(dev);
- ip6gre_tnl_link_config(nt, 1);
if (register_netdevice(dev) < 0)
goto failed_free;
+ ip6gre_tnl_link_config(nt, 1);
+
/* Can use a lockless transmit, unless we generate output sequences */
if (!(nt->parms.o_flags & TUNNEL_SEQ))
dev->features |= NETIF_F_LLTX;
@@ -461,7 +462,7 @@
&ipv6h->saddr, &ipv6h->daddr, tpi->key,
tpi->proto);
if (tunnel) {
- ip6_tnl_rcv(tunnel, skb, tpi, NULL, false);
+ ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
return PACKET_RCVD;
}
@@ -1267,7 +1268,6 @@
static int ip6gre_tap_init(struct net_device *dev)
{
- struct ip6_tnl *tunnel;
int ret;
ret = ip6gre_tunnel_init_common(dev);
@@ -1276,10 +1276,6 @@
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- tunnel = netdev_priv(dev);
-
- ip6gre_tnl_link_config(tunnel, 1);
-
return 0;
}
@@ -1374,7 +1370,6 @@
nt->dev = dev;
nt->net = dev_net(dev);
- ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
dev->features |= GRE6_FEATURES;
dev->hw_features |= GRE6_FEATURES;
@@ -1400,6 +1395,11 @@
if (err)
goto out;
+ ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+
+ if (tb[IFLA_MTU])
+ ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+
dev_hold(dev);
ip6gre_tunnel_link(ign, nt);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4e6c439..55ca65c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -165,6 +165,14 @@
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
+bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+{
+ if (!np->autoflowlabel_set)
+ return ip6_default_np_autolabel(net);
+ else
+ return np->autoflowlabel;
+}
+
/*
* xmit an sk_buff (used by TCP, SCTP and DCCP)
* Note : socket lock is not held for SYNACK packets, but might be modified
@@ -228,7 +236,7 @@
hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel, fl6));
+ ip6_autoflowlabel(net, np), fl6));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -1260,14 +1268,16 @@
v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
- rt->dst.dev->mtu : dst_mtu(&rt->dst);
+ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
else
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
- rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
if (np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
}
+ if (mtu < IPV6_MIN_MTU)
+ return -EINVAL;
cork->base.fragsize = mtu;
if (dst_allfrag(rt->dst.path))
cork->base.flags |= IPCORK_ALLFRAG;
@@ -1699,7 +1709,7 @@
ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel, fl6));
+ ip6_autoflowlabel(net, np), fl6));
hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
@@ -1798,11 +1808,13 @@
cork.base.flags = 0;
cork.base.addr = 0;
cork.base.opt = NULL;
+ cork.base.dst = NULL;
v6_cork.opt = NULL;
err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
- if (err)
+ if (err) {
+ ip6_cork_release(&cork, &v6_cork);
return ERR_PTR(err);
-
+ }
if (ipc6->dontfrag < 0)
ipc6->dontfrag = inet6_sk(sk)->dontfrag;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 64aefc2..4c52236 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -911,7 +911,7 @@
if (t->parms.collect_md) {
tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
if (!tun_dst)
- return 0;
+ goto drop;
}
ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
log_ecn_error);
@@ -1080,10 +1080,11 @@
memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
neigh_release(neigh);
}
- } else if (!(t->parms.flags &
- (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
- /* enable the cache only only if the routing decision does
- * not depend on the current inner header value
+ } else if (t->parms.proto != 0 && !(t->parms.flags &
+ (IP6_TNL_F_USE_ORIG_TCLASS |
+ IP6_TNL_F_USE_ORIG_FWMARK))) {
+ /* enable the cache only if neither the outer protocol nor the
+ * routing decision depends on the current inner header value
*/
use_cache = true;
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index da64b20..afc30a0 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -189,12 +189,12 @@
struct vti6_net *ip6n = net_generic(net, vti6_net_id);
int err;
+ dev->rtnl_link_ops = &vti6_link_ops;
err = register_netdevice(dev);
if (err < 0)
goto out;
strcpy(t->parms.name, dev->name);
- dev->rtnl_link_ops = &vti6_link_ops;
dev_hold(dev);
vti6_tnl_link(ip6n, t);
@@ -485,11 +485,15 @@
if (!skb->ignore_df && skb->len > mtu) {
skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
- if (skb->protocol == htons(ETH_P_IPV6))
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- else
+ } else {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
+ }
return -EMSGSIZE;
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 117405d..a30e7e9 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -495,6 +495,7 @@
return ERR_PTR(-ENOENT);
it->mrt = mrt;
+ it->cache = NULL;
return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
: SEQ_START_TOKEN;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 38bee17..bcea985 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -874,6 +874,7 @@
break;
case IPV6_AUTOFLOWLABEL:
np->autoflowlabel = valbool;
+ np->autoflowlabel_set = 1;
retv = 0;
break;
}
@@ -1315,7 +1316,7 @@
break;
case IPV6_AUTOFLOWLABEL:
- val = np->autoflowlabel;
+ val = ip6_autoflowlabel(sock_net(sk), np);
break;
default:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 1bdc703..ca8fac6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1682,16 +1682,16 @@
}
static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
- int type, struct mld2_grec **ppgr)
+ int type, struct mld2_grec **ppgr, unsigned int mtu)
{
- struct net_device *dev = pmc->idev->dev;
struct mld2_report *pmr;
struct mld2_grec *pgr;
- if (!skb)
- skb = mld_newpack(pmc->idev, dev->mtu);
- if (!skb)
- return NULL;
+ if (!skb) {
+ skb = mld_newpack(pmc->idev, mtu);
+ if (!skb)
+ return NULL;
+ }
pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
pgr->grec_type = type;
pgr->grec_auxwords = 0;
@@ -1714,10 +1714,15 @@
struct mld2_grec *pgr = NULL;
struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
int scount, stotal, first, isquery, truncate;
+ unsigned int mtu;
if (pmc->mca_flags & MAF_NOREPORT)
return skb;
+ mtu = READ_ONCE(dev->mtu);
+ if (mtu < IPV6_MIN_MTU)
+ return skb;
+
isquery = type == MLD2_MODE_IS_INCLUDE ||
type == MLD2_MODE_IS_EXCLUDE;
truncate = type == MLD2_MODE_IS_EXCLUDE ||
@@ -1738,7 +1743,7 @@
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(idev, dev->mtu);
+ skb = mld_newpack(idev, mtu);
}
}
first = 1;
@@ -1774,12 +1779,12 @@
pgr->grec_nsrcs = htons(scount);
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(idev, dev->mtu);
+ skb = mld_newpack(idev, mtu);
first = 1;
scount = 0;
}
if (first) {
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
first = 0;
}
if (!skb)
@@ -1814,7 +1819,7 @@
mld_sendpack(skb);
skb = NULL; /* add_grhead will get a new one */
}
- skb = add_grhead(skb, pmc, type, &pgr);
+ skb = add_grhead(skb, pmc, type, &pgr, mtu);
}
}
if (pgr)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 40d7405..db6d437 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1085,6 +1085,7 @@
ipip6_tunnel_link(sitn, t);
t->parms.iph.ttl = p->iph.ttl;
t->parms.iph.tos = p->iph.tos;
+ t->parms.iph.frag_off = p->iph.frag_off;
if (t->parms.link != p->link) {
t->parms.link = p->link;
ipip6_tunnel_bind_dev(t->dev);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d372342..82b7a8e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -968,7 +968,7 @@
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
0, 0);
}
@@ -1442,7 +1442,9 @@
}
sock_hold(sk);
refcounted = true;
- nsk = tcp_check_req(sk, skb, req, false);
+ nsk = NULL;
+ if (!tcp_filter(sk, skb))
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
goto discard_and_relse;
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index d883c92..278e49c 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -46,6 +46,9 @@
{
struct tcphdr *th;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
+ return ERR_PTR(-EINVAL);
+
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index e7d378c..2bd2087 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -55,6 +55,9 @@
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ goto out;
+
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 7eb0e8f..22785dc 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1624,60 +1624,35 @@
};
/* Clone a kcm socket. */
-static int kcm_clone(struct socket *osock, struct kcm_clone *info,
- struct socket **newsockp)
+static struct file *kcm_clone(struct socket *osock)
{
struct socket *newsock;
struct sock *newsk;
- struct file *newfile;
- int err, newfd;
+ struct file *file;
- err = -ENFILE;
newsock = sock_alloc();
if (!newsock)
- goto out;
+ return ERR_PTR(-ENFILE);
newsock->type = osock->type;
newsock->ops = osock->ops;
__module_get(newsock->ops->owner);
- newfd = get_unused_fd_flags(0);
- if (unlikely(newfd < 0)) {
- err = newfd;
- goto out_fd_fail;
- }
-
- newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
- if (unlikely(IS_ERR(newfile))) {
- err = PTR_ERR(newfile);
- goto out_sock_alloc_fail;
- }
-
newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
&kcm_proto, true);
if (!newsk) {
- err = -ENOMEM;
- goto out_sk_alloc_fail;
+ sock_release(newsock);
+ return ERR_PTR(-ENOMEM);
}
-
sock_init_data(newsock, newsk);
init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
- fd_install(newfd, newfile);
- *newsockp = newsock;
- info->fd = newfd;
+ file = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
+ if (IS_ERR(file))
+ sock_release(newsock);
- return 0;
-
-out_sk_alloc_fail:
- fput(newfile);
-out_sock_alloc_fail:
- put_unused_fd(newfd);
-out_fd_fail:
- sock_release(newsock);
-out:
- return err;
+ return file;
}
static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
@@ -1707,21 +1682,25 @@
}
case SIOCKCMCLONE: {
struct kcm_clone info;
- struct socket *newsock = NULL;
+ struct file *file;
- if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
- return -EFAULT;
+ info.fd = get_unused_fd_flags(0);
+ if (unlikely(info.fd < 0))
+ return info.fd;
- err = kcm_clone(sock, &info, &newsock);
-
- if (!err) {
- if (copy_to_user((void __user *)arg, &info,
- sizeof(info))) {
- err = -EFAULT;
- sys_close(info.fd);
- }
+ file = kcm_clone(sock);
+ if (IS_ERR(file)) {
+ put_unused_fd(info.fd);
+ return PTR_ERR(file);
}
-
+ if (copy_to_user((void __user *)arg, &info,
+ sizeof(info))) {
+ put_unused_fd(info.fd);
+ fput(file);
+ return -EFAULT;
+ }
+ fd_install(info.fd, file);
+ err = 0;
break;
}
default:
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 94bf810..6482b00 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -401,6 +401,11 @@
#endif
int len;
+ if (sp->sadb_address_len <
+ DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family),
+ sizeof(uint64_t)))
+ return -EINVAL;
+
switch (addr->sa_family) {
case AF_INET:
len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t));
@@ -511,6 +516,9 @@
uint16_t ext_type;
int ext_len;
+ if (len < sizeof(*ehdr))
+ return -EINVAL;
+
ext_len = ehdr->sadb_ext_len;
ext_len *= sizeof(uint64_t);
ext_type = ehdr->sadb_ext_type;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index b06acd0..cfc4dd8 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1944,7 +1944,7 @@
rcu_read_lock_bh();
list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- (void)l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_delete(tunnel);
}
rcu_read_unlock_bh();
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 3468d56..9d77a54 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -48,7 +48,8 @@
return (struct l2tp_ip_sock *)sk;
}
-static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr,
+ __be32 raddr, int dif, u32 tunnel_id)
{
struct sock *sk;
@@ -62,6 +63,7 @@
if ((l2tp->conn_id == tunnel_id) &&
net_eq(sock_net(sk), net) &&
!(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
+ (!inet->inet_daddr || !raddr || inet->inet_daddr == raddr) &&
(!sk->sk_bound_dev_if || !dif ||
sk->sk_bound_dev_if == dif))
goto found;
@@ -72,15 +74,6 @@
return sk;
}
-static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
-{
- struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id);
- if (sk)
- sock_hold(sk);
-
- return sk;
-}
-
/* When processing receive frames, there are two cases to
* consider. Data frames consist of a non-zero session-id and an
* optional cookie. Control frames consist of a regular L2TP header
@@ -186,8 +179,8 @@
struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
read_lock_bh(&l2tp_ip_lock);
- sk = __l2tp_ip_bind_lookup(net, iph->daddr, inet_iif(skb),
- tunnel_id);
+ sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr,
+ inet_iif(skb), tunnel_id);
if (!sk) {
read_unlock_bh(&l2tp_ip_lock);
goto discard;
@@ -289,7 +282,7 @@
inet->inet_saddr = 0; /* Use device */
write_lock_bh(&l2tp_ip_lock);
- if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr,
+ if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0,
sk->sk_bound_dev_if, addr->l2tp_conn_id)) {
write_unlock_bh(&l2tp_ip_lock);
ret = -EADDRINUSE;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 96efe47..86ad51a 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -59,12 +59,14 @@
static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
struct in6_addr *laddr,
+ const struct in6_addr *raddr,
int dif, u32 tunnel_id)
{
struct sock *sk;
sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk);
+ const struct in6_addr *sk_raddr = &sk->sk_v6_daddr;
struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
if (l2tp == NULL)
@@ -73,6 +75,7 @@
if ((l2tp->conn_id == tunnel_id) &&
net_eq(sock_net(sk), net) &&
(!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) &&
+ (!raddr || ipv6_addr_any(sk_raddr) || ipv6_addr_equal(sk_raddr, raddr)) &&
(!sk->sk_bound_dev_if || !dif ||
sk->sk_bound_dev_if == dif))
goto found;
@@ -83,17 +86,6 @@
return sk;
}
-static inline struct sock *l2tp_ip6_bind_lookup(struct net *net,
- struct in6_addr *laddr,
- int dif, u32 tunnel_id)
-{
- struct sock *sk = __l2tp_ip6_bind_lookup(net, laddr, dif, tunnel_id);
- if (sk)
- sock_hold(sk);
-
- return sk;
-}
-
/* When processing receive frames, there are two cases to
* consider. Data frames consist of a non-zero session-id and an
* optional cookie. Control frames consist of a regular L2TP header
@@ -200,8 +192,8 @@
struct ipv6hdr *iph = ipv6_hdr(skb);
read_lock_bh(&l2tp_ip6_lock);
- sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, inet6_iif(skb),
- tunnel_id);
+ sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
+ inet6_iif(skb), tunnel_id);
if (!sk) {
read_unlock_bh(&l2tp_ip6_lock);
goto discard;
@@ -339,7 +331,7 @@
rcu_read_unlock();
write_lock_bh(&l2tp_ip6_lock);
- if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, bound_dev_if,
+ if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, NULL, bound_dev_if,
addr->l2tp_conn_id)) {
write_unlock_bh(&l2tp_ip6_lock);
err = -EADDRINUSE;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 1ccd310..ee03bc8 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -287,7 +287,7 @@
l2tp_tunnel_notify(&l2tp_nl_family, info,
tunnel, L2TP_CMD_TUNNEL_DELETE);
- (void) l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_delete(tunnel);
out:
return ret;
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index e75cbf6..a0d901d 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -231,9 +231,6 @@
!(sta->sdata->bss && sta->sdata->bss == sdata->bss))
continue;
- if (!sta->uploaded || !test_sta_flag(sta, WLAN_STA_ASSOC))
- continue;
-
max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta));
}
rcu_read_unlock();
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 5c67a69..b4b3fe0 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -279,8 +279,6 @@
/* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */
*pos |= ifmsh->ps_peers_deep_sleep ?
IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00;
- *pos++ = 0x00;
-
return 0;
}
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index b747c96..fed598a 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -788,7 +788,7 @@
struct mesh_path *mpath;
u8 ttl, flags, hopcount;
const u8 *orig_addr;
- u32 orig_sn, metric, metric_txsta, interval;
+ u32 orig_sn, new_metric, orig_metric, last_hop_metric, interval;
bool root_is_gate;
ttl = rann->rann_ttl;
@@ -799,7 +799,7 @@
interval = le32_to_cpu(rann->rann_interval);
hopcount = rann->rann_hopcount;
hopcount++;
- metric = le32_to_cpu(rann->rann_metric);
+ orig_metric = le32_to_cpu(rann->rann_metric);
/* Ignore our own RANNs */
if (ether_addr_equal(orig_addr, sdata->vif.addr))
@@ -816,7 +816,10 @@
return;
}
- metric_txsta = airtime_link_metric_get(local, sta);
+ last_hop_metric = airtime_link_metric_get(local, sta);
+ new_metric = orig_metric + last_hop_metric;
+ if (new_metric < orig_metric)
+ new_metric = MAX_METRIC;
mpath = mesh_path_lookup(sdata, orig_addr);
if (!mpath) {
@@ -829,7 +832,7 @@
}
if (!(SN_LT(mpath->sn, orig_sn)) &&
- !(mpath->sn == orig_sn && metric < mpath->rann_metric)) {
+ !(mpath->sn == orig_sn && new_metric < mpath->rann_metric)) {
rcu_read_unlock();
return;
}
@@ -847,7 +850,7 @@
}
mpath->sn = orig_sn;
- mpath->rann_metric = metric + metric_txsta;
+ mpath->rann_metric = new_metric;
mpath->is_root = true;
/* Recording RANNs sender address to send individually
* addressed PREQs destined for root mesh STA */
@@ -867,7 +870,7 @@
mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr,
orig_sn, 0, NULL, 0, broadcast_addr,
hopcount, ttl, interval,
- metric + metric_txsta, 0, sdata);
+ new_metric, 0, sdata);
}
rcu_read_unlock();
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 274c564..1ffd1e1 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1244,7 +1244,7 @@
static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
struct ieee80211_vif *vif,
- struct ieee80211_sta *pubsta,
+ struct sta_info *sta,
struct sk_buff *skb)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -1258,10 +1258,13 @@
if (!ieee80211_is_data(hdr->frame_control))
return NULL;
- if (pubsta) {
+ if (sta) {
u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
- txq = pubsta->txq[tid];
+ if (!sta->uploaded)
+ return NULL;
+
+ txq = sta->sta.txq[tid];
} else if (vif) {
txq = vif->txq;
}
@@ -1499,23 +1502,17 @@
struct fq *fq = &local->fq;
struct ieee80211_vif *vif;
struct txq_info *txqi;
- struct ieee80211_sta *pubsta;
if (!local->ops->wake_tx_queue ||
sdata->vif.type == NL80211_IFTYPE_MONITOR)
return false;
- if (sta && sta->uploaded)
- pubsta = &sta->sta;
- else
- pubsta = NULL;
-
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
sdata = container_of(sdata->bss,
struct ieee80211_sub_if_data, u.ap);
vif = &sdata->vif;
- txqi = ieee80211_get_txq(local, vif, pubsta, skb);
+ txqi = ieee80211_get_txq(local, vif, sta, skb);
if (!txqi)
return false;
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index efa3f48..73e8f34 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -293,7 +293,8 @@
return RX_DROP_UNUSABLE;
ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key);
/* remove ICV */
- if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN))
+ if (!(status->flag & RX_FLAG_ICV_STRIPPED) &&
+ pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN))
return RX_DROP_UNUSABLE;
}
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 5c71d60..caa5986 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -295,7 +295,8 @@
return RX_DROP_UNUSABLE;
/* Trim ICV */
- skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN);
+ if (!(status->flag & RX_FLAG_ICV_STRIPPED))
+ skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN);
/* Remove IV */
memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 1309e2c..c5a5a69 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -937,6 +937,8 @@
{
struct mpls_route __rcu **platform_label;
struct net *net = dev_net(dev);
+ unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN;
+ unsigned int alive;
unsigned index;
platform_label = rtnl_dereference(net->mpls.platform_label);
@@ -946,9 +948,11 @@
if (!rt)
continue;
+ alive = 0;
change_nexthops(rt) {
if (rtnl_dereference(nh->nh_dev) != dev)
- continue;
+ goto next;
+
switch (event) {
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
@@ -956,13 +960,16 @@
/* fall through */
case NETDEV_CHANGE:
nh->nh_flags |= RTNH_F_LINKDOWN;
- if (event != NETDEV_UNREGISTER)
- ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
break;
}
if (event == NETDEV_UNREGISTER)
RCU_INIT_POINTER(nh->nh_dev, NULL);
+next:
+ if (!(nh->nh_flags & nh_flags))
+ alive++;
} endfor_nexthops(rt);
+
+ WRITE_ONCE(rt->rt_nhn_alive, alive);
}
}
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 004af03..d869ea5 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -364,6 +364,11 @@
ret = nf_queue(skb, state, &entry, verdict);
if (ret == 1 && entry)
goto next_hook;
+ } else {
+ /* Implicit handling for NF_STOLEN, as well as any other
+ * non conventional verdicts.
+ */
+ ret = 0;
}
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index a6e44ef..2155c24 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2040,12 +2040,16 @@
seq_puts(seq,
" -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
} else {
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
const struct ip_vs_service *svc = v;
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
char *sched_name = sched ? sched->name : "none";
+ if (svc->ipvs != ipvs)
+ return 0;
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 750b8bf..8da893c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1023,7 +1023,7 @@
next_run = gc_work->next_gc_run;
gc_work->last_bucket = i;
- queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
+ queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
@@ -1922,7 +1922,7 @@
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
conntrack_gc_work_init(&conntrack_gc_work);
- queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
+ queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
return 0;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index b1fcfa0..3f49912 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include <linux/errno.h>
+#include <linux/capability.h>
#include <net/netlink.h>
#include <net/sock.h>
@@ -32,6 +33,13 @@
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers");
+struct nfnl_cthelper {
+ struct list_head list;
+ struct nf_conntrack_helper helper;
+};
+
+static LIST_HEAD(nfnl_cthelper_list);
+
static int
nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
@@ -205,18 +213,20 @@
struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_helper *helper;
+ struct nfnl_cthelper *nfcth;
int ret;
if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN])
return -EINVAL;
- helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL);
- if (helper == NULL)
+ nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL);
+ if (nfcth == NULL)
return -ENOMEM;
+ helper = &nfcth->helper;
ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
if (ret < 0)
- goto err;
+ goto err1;
strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
@@ -247,12 +257,98 @@
ret = nf_conntrack_helper_register(helper);
if (ret < 0)
- goto err;
+ goto err2;
+
+ list_add_tail(&nfcth->list, &nfnl_cthelper_list);
+ return 0;
+err2:
+ kfree(helper->expect_policy);
+err1:
+ kfree(nfcth);
+ return ret;
+}
+
+static int
+nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
+ struct nf_conntrack_expect_policy *new_policy,
+ const struct nlattr *attr)
+{
+ struct nlattr *tb[NFCTH_POLICY_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
+ nfnl_cthelper_expect_pol);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFCTH_POLICY_NAME] ||
+ !tb[NFCTH_POLICY_EXPECT_MAX] ||
+ !tb[NFCTH_POLICY_EXPECT_TIMEOUT])
+ return -EINVAL;
+
+ if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name))
+ return -EBUSY;
+
+ new_policy->max_expected =
+ ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
+ new_policy->timeout =
+ ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
return 0;
-err:
- kfree(helper);
- return ret;
+}
+
+static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
+ struct nf_conntrack_helper *helper)
+{
+ struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1];
+ struct nf_conntrack_expect_policy *policy;
+ int i, err;
+
+ /* Check first that all policy attributes are well-formed, so we don't
+ * leave things in inconsistent state on errors.
+ */
+ for (i = 0; i < helper->expect_class_max + 1; i++) {
+
+ if (!tb[NFCTH_POLICY_SET + i])
+ return -EINVAL;
+
+ err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
+ &new_policy[i],
+ tb[NFCTH_POLICY_SET + i]);
+ if (err < 0)
+ return err;
+ }
+ /* Now we can safely update them. */
+ for (i = 0; i < helper->expect_class_max + 1; i++) {
+ policy = (struct nf_conntrack_expect_policy *)
+ &helper->expect_policy[i];
+ policy->max_expected = new_policy->max_expected;
+ policy->timeout = new_policy->timeout;
+ }
+
+ return 0;
+}
+
+static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,
+ const struct nlattr *attr)
+{
+ struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1];
+ unsigned int class_max;
+ int err;
+
+ err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
+ nfnl_cthelper_expect_policy_set);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFCTH_POLICY_SET_NUM])
+ return -EINVAL;
+
+ class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+ if (helper->expect_class_max + 1 != class_max)
+ return -EBUSY;
+
+ return nfnl_cthelper_update_policy_all(tb, helper);
}
static int
@@ -265,8 +361,7 @@
return -EBUSY;
if (tb[NFCTH_POLICY]) {
- ret = nfnl_cthelper_parse_expect_policy(helper,
- tb[NFCTH_POLICY]);
+ ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]);
if (ret < 0)
return ret;
}
@@ -295,7 +390,11 @@
const char *helper_name;
struct nf_conntrack_helper *cur, *helper = NULL;
struct nf_conntrack_tuple tuple;
- int ret = 0, i;
+ struct nfnl_cthelper *nlcth;
+ int ret = 0;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
return -EINVAL;
@@ -306,31 +405,22 @@
if (ret < 0)
return ret;
- rcu_read_lock();
- for (i = 0; i < nf_ct_helper_hsize && !helper; i++) {
- hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+ list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+ cur = &nlcth->helper;
- /* skip non-userspace conntrack helpers. */
- if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
- continue;
+ if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+ continue;
- if (strncmp(cur->name, helper_name,
- NF_CT_HELPER_NAME_LEN) != 0)
- continue;
+ if ((tuple.src.l3num != cur->tuple.src.l3num ||
+ tuple.dst.protonum != cur->tuple.dst.protonum))
+ continue;
- if ((tuple.src.l3num != cur->tuple.src.l3num ||
- tuple.dst.protonum != cur->tuple.dst.protonum))
- continue;
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
- if (nlh->nlmsg_flags & NLM_F_EXCL) {
- ret = -EEXIST;
- goto err;
- }
- helper = cur;
- break;
- }
+ helper = cur;
+ break;
}
- rcu_read_unlock();
if (helper == NULL)
ret = nfnl_cthelper_create(tb, &tuple);
@@ -338,9 +428,6 @@
ret = nfnl_cthelper_update(tb, helper);
return ret;
-err:
- rcu_read_unlock();
- return ret;
}
static int
@@ -504,13 +591,17 @@
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const tb[])
{
- int ret = -ENOENT, i;
+ int ret = -ENOENT;
struct nf_conntrack_helper *cur;
struct sk_buff *skb2;
char *helper_name = NULL;
struct nf_conntrack_tuple tuple;
+ struct nfnl_cthelper *nlcth;
bool tuple_set = false;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nfnl_cthelper_dump_table,
@@ -529,45 +620,39 @@
tuple_set = true;
}
- for (i = 0; i < nf_ct_helper_hsize; i++) {
- hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+ list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+ cur = &nlcth->helper;
+ if (helper_name &&
+ strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+ continue;
- /* skip non-userspace conntrack helpers. */
- if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
- continue;
+ if (tuple_set &&
+ (tuple.src.l3num != cur->tuple.src.l3num ||
+ tuple.dst.protonum != cur->tuple.dst.protonum))
+ continue;
- if (helper_name && strncmp(cur->name, helper_name,
- NF_CT_HELPER_NAME_LEN) != 0) {
- continue;
- }
- if (tuple_set &&
- (tuple.src.l3num != cur->tuple.src.l3num ||
- tuple.dst.protonum != cur->tuple.dst.protonum))
- continue;
-
- skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL) {
- ret = -ENOMEM;
- break;
- }
-
- ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type),
- NFNL_MSG_CTHELPER_NEW, cur);
- if (ret <= 0) {
- kfree_skb(skb2);
- break;
- }
-
- ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret > 0)
- ret = 0;
-
- /* this avoids a loop in nfnetlink. */
- return ret == -EAGAIN ? -ENOBUFS : ret;
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL) {
+ ret = -ENOMEM;
+ break;
}
+
+ ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(nlh->nlmsg_type),
+ NFNL_MSG_CTHELPER_NEW, cur);
+ if (ret <= 0) {
+ kfree_skb(skb2);
+ break;
+ }
+
+ ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
+ if (ret > 0)
+ ret = 0;
+
+ /* this avoids a loop in nfnetlink. */
+ return ret == -EAGAIN ? -ENOBUFS : ret;
}
return ret;
}
@@ -578,10 +663,13 @@
{
char *helper_name = NULL;
struct nf_conntrack_helper *cur;
- struct hlist_node *tmp;
struct nf_conntrack_tuple tuple;
bool tuple_set = false, found = false;
- int i, j = 0, ret;
+ struct nfnl_cthelper *nlcth, *n;
+ int j = 0, ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
if (tb[NFCTH_NAME])
helper_name = nla_data(tb[NFCTH_NAME]);
@@ -594,28 +682,27 @@
tuple_set = true;
}
- for (i = 0; i < nf_ct_helper_hsize; i++) {
- hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
- hnode) {
- /* skip non-userspace conntrack helpers. */
- if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
- continue;
+ list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+ cur = &nlcth->helper;
+ j++;
- j++;
+ if (helper_name &&
+ strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+ continue;
- if (helper_name && strncmp(cur->name, helper_name,
- NF_CT_HELPER_NAME_LEN) != 0) {
- continue;
- }
- if (tuple_set &&
- (tuple.src.l3num != cur->tuple.src.l3num ||
- tuple.dst.protonum != cur->tuple.dst.protonum))
- continue;
+ if (tuple_set &&
+ (tuple.src.l3num != cur->tuple.src.l3num ||
+ tuple.dst.protonum != cur->tuple.dst.protonum))
+ continue;
- found = true;
- nf_conntrack_helper_unregister(cur);
- }
+ found = true;
+ nf_conntrack_helper_unregister(cur);
+ kfree(cur->expect_policy);
+
+ list_del(&nlcth->list);
+ kfree(nlcth);
}
+
/* Make sure we return success if we flush and there is no helpers */
return (found || j == 0) ? 0 : -ENOENT;
}
@@ -664,20 +751,16 @@
static void __exit nfnl_cthelper_exit(void)
{
struct nf_conntrack_helper *cur;
- struct hlist_node *tmp;
- int i;
+ struct nfnl_cthelper *nlcth, *n;
nfnetlink_subsys_unregister(&nfnl_cthelper_subsys);
- for (i=0; i<nf_ct_helper_hsize; i++) {
- hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
- hnode) {
- /* skip non-userspace conntrack helpers. */
- if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
- continue;
+ list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+ cur = &nlcth->helper;
- nf_conntrack_helper_unregister(cur);
- }
+ nf_conntrack_helper_unregister(cur);
+ kfree(cur->expect_policy);
+ kfree(nlcth);
}
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index af832c5..5efb402 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -443,7 +443,7 @@
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb) {
skb_tx_error(entskb);
- return NULL;
+ goto nlmsg_failure;
}
nlh = nlmsg_put(skb, 0, 0,
@@ -452,7 +452,7 @@
if (!nlh) {
skb_tx_error(entskb);
kfree_skb(skb);
- return NULL;
+ goto nlmsg_failure;
}
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = entry->state.pf;
@@ -598,12 +598,17 @@
}
nlh->nlmsg_len = skb->len;
+ if (seclen)
+ security_release_secctx(secdata, seclen);
return skb;
nla_put_failure:
skb_tx_error(entskb);
kfree_skb(skb);
net_err_ratelimited("nf_queue: error creating packet message\n");
+nlmsg_failure:
+ if (seclen)
+ security_release_secctx(secdata, seclen);
return NULL;
}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 2455b69..b589a62 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/capability.h>
#include <linux/if.h>
#include <linux/inetdevice.h>
#include <linux/ip.h>
@@ -69,6 +70,9 @@
struct xt_osf_finger *kf = NULL, *sf;
int err = 0;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!osf_attrs[OSF_ATTR_FINGER])
return -EINVAL;
@@ -113,6 +117,9 @@
struct xt_osf_finger *sf;
int err = -ENOENT;
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
if (!osf_attrs[OSF_ATTR_FINGER])
return -EINVAL;
diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c
index cd8309c..3dfa5a0 100644
--- a/net/netfilter/xt_qtaguid.c
+++ b/net/netfilter/xt_qtaguid.c
@@ -519,13 +519,11 @@
DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
full_tag);
- spin_lock_bh(&uid_tag_data_tree_lock);
tr_entry = lookup_tag_ref(full_tag, &utd_entry);
BUG_ON(IS_ERR_OR_NULL(utd_entry));
if (!tr_entry)
tr_entry = new_tag_ref(full_tag, utd_entry);
- spin_unlock_bh(&uid_tag_data_tree_lock);
if (utd_res)
*utd_res = utd_entry;
DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
@@ -2027,6 +2025,7 @@
/* Delete socket tags */
spin_lock_bh(&sock_tag_list_lock);
+ spin_lock_bh(&uid_tag_data_tree_lock);
node = rb_first(&sock_tag_tree);
while (node) {
st_entry = rb_entry(node, struct sock_tag, sock_node);
@@ -2056,6 +2055,7 @@
list_del(&st_entry->list);
}
}
+ spin_unlock_bh(&uid_tag_data_tree_lock);
spin_unlock_bh(&sock_tag_list_lock);
sock_tag_tree_erase(&st_to_free_tree);
@@ -2265,10 +2265,12 @@
full_tag = combine_atag_with_uid(acct_tag, uid_int);
spin_lock_bh(&sock_tag_list_lock);
+ spin_lock_bh(&uid_tag_data_tree_lock);
sock_tag_entry = get_sock_stat_nl(el_socket->sk);
tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
if (IS_ERR(tag_ref_entry)) {
res = PTR_ERR(tag_ref_entry);
+ spin_unlock_bh(&uid_tag_data_tree_lock);
spin_unlock_bh(&sock_tag_list_lock);
goto err_put;
}
@@ -2295,9 +2297,14 @@
pr_err("qtaguid: ctrl_tag(%s): "
"socket tag alloc failed\n",
input);
+ BUG_ON(tag_ref_entry->num_sock_tags <= 0);
+ tag_ref_entry->num_sock_tags--;
+ free_tag_ref_from_utd_entry(tag_ref_entry,
+ uid_tag_data_entry);
+ spin_unlock_bh(&uid_tag_data_tree_lock);
spin_unlock_bh(&sock_tag_list_lock);
res = -ENOMEM;
- goto err_tag_unref_put;
+ goto err_put;
}
/*
* Hold the sk refcount here to make sure the sk pointer cannot
@@ -2307,7 +2314,6 @@
sock_tag_entry->sk = el_socket->sk;
sock_tag_entry->pid = current->tgid;
sock_tag_entry->tag = combine_atag_with_uid(acct_tag, uid_int);
- spin_lock_bh(&uid_tag_data_tree_lock);
pqd_entry = proc_qtu_data_tree_search(
&proc_qtu_data_tree, current->tgid);
/*
@@ -2325,11 +2331,11 @@
else
list_add(&sock_tag_entry->list,
&pqd_entry->sock_tag_list);
- spin_unlock_bh(&uid_tag_data_tree_lock);
sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
atomic64_inc(&qtu_events.sockets_tagged);
}
+ spin_unlock_bh(&uid_tag_data_tree_lock);
spin_unlock_bh(&sock_tag_list_lock);
/* We keep the ref to the sk until it is untagged */
CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->sk_refcnt=%d\n",
@@ -2338,10 +2344,6 @@
sockfd_put(el_socket);
return 0;
-err_tag_unref_put:
- BUG_ON(tag_ref_entry->num_sock_tags <= 0);
- tag_ref_entry->num_sock_tags--;
- free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
err_put:
CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->sk_refcnt=%d\n",
input, atomic_read(&el_socket->sk->sk_refcnt) - 1);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c9fac08..e1c123d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -96,6 +96,44 @@
static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
+static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS];
+
+static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
+ "nlk_cb_mutex-ROUTE",
+ "nlk_cb_mutex-1",
+ "nlk_cb_mutex-USERSOCK",
+ "nlk_cb_mutex-FIREWALL",
+ "nlk_cb_mutex-SOCK_DIAG",
+ "nlk_cb_mutex-NFLOG",
+ "nlk_cb_mutex-XFRM",
+ "nlk_cb_mutex-SELINUX",
+ "nlk_cb_mutex-ISCSI",
+ "nlk_cb_mutex-AUDIT",
+ "nlk_cb_mutex-FIB_LOOKUP",
+ "nlk_cb_mutex-CONNECTOR",
+ "nlk_cb_mutex-NETFILTER",
+ "nlk_cb_mutex-IP6_FW",
+ "nlk_cb_mutex-DNRTMSG",
+ "nlk_cb_mutex-KOBJECT_UEVENT",
+ "nlk_cb_mutex-GENERIC",
+ "nlk_cb_mutex-17",
+ "nlk_cb_mutex-SCSITRANSPORT",
+ "nlk_cb_mutex-ECRYPTFS",
+ "nlk_cb_mutex-RDMA",
+ "nlk_cb_mutex-CRYPTO",
+ "nlk_cb_mutex-SMC",
+ "nlk_cb_mutex-23",
+ "nlk_cb_mutex-24",
+ "nlk_cb_mutex-25",
+ "nlk_cb_mutex-26",
+ "nlk_cb_mutex-27",
+ "nlk_cb_mutex-28",
+ "nlk_cb_mutex-29",
+ "nlk_cb_mutex-30",
+ "nlk_cb_mutex-31",
+ "nlk_cb_mutex-MAX_LINKS"
+};
+
static int netlink_dump(struct sock *sk);
static void netlink_skb_destructor(struct sk_buff *skb);
@@ -223,6 +261,9 @@
struct sock *sk = skb->sk;
int ret = -ENOMEM;
+ if (!net_eq(dev_net(dev), sock_net(sk)))
+ return 0;
+
dev_hold(dev);
if (is_vmalloc_addr(skb->head))
@@ -585,6 +626,9 @@
} else {
nlk->cb_mutex = &nlk->cb_def_mutex;
mutex_init(nlk->cb_mutex);
+ lockdep_set_class_and_name(nlk->cb_mutex,
+ nlk_cb_mutex_keys + protocol,
+ nlk_cb_mutex_key_strings[protocol]);
}
init_waitqueue_head(&nlk->wait);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 0792541..1668916 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1789,14 +1789,11 @@
#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
+static struct sw_flow_actions *nla_alloc_flow_actions(int size)
{
struct sw_flow_actions *sfa;
- if (size > MAX_ACTIONS_BUFSIZE) {
- OVS_NLERR(log, "Flow action size %u bytes exceeds max", size);
- return ERR_PTR(-EINVAL);
- }
+ WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
if (!sfa)
@@ -1869,12 +1866,15 @@
new_acts_size = ksize(*sfa) * 2;
if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
+ if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
+ OVS_NLERR(log, "Flow action size exceeds max %u",
+ MAX_ACTIONS_BUFSIZE);
return ERR_PTR(-EMSGSIZE);
+ }
new_acts_size = MAX_ACTIONS_BUFSIZE;
}
- acts = nla_alloc_flow_actions(new_acts_size, log);
+ acts = nla_alloc_flow_actions(new_acts_size);
if (IS_ERR(acts))
return (void *)acts;
@@ -2500,7 +2500,7 @@
{
int err;
- *sfa = nla_alloc_flow_actions(nla_len(attr), log);
+ *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e7f6657..267db0d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1661,7 +1661,6 @@
atomic_long_set(&rollover->num, 0);
atomic_long_set(&rollover->num_huge, 0);
atomic_long_set(&rollover->num_failed, 0);
- po->rollover = rollover;
}
match = NULL;
@@ -1706,6 +1705,8 @@
if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
__dev_remove_pack(&po->prot_hook);
po->fanout = match;
+ po->rollover = rollover;
+ rollover = NULL;
atomic_inc(&match->sk_ref);
__fanout_link(sk, po);
err = 0;
@@ -1719,10 +1720,7 @@
}
out:
- if (err && rollover) {
- kfree_rcu(rollover, rcu);
- po->rollover = NULL;
- }
+ kfree(rollover);
mutex_unlock(&fanout_mutex);
return err;
}
@@ -1746,11 +1744,6 @@
list_del(&f->list);
else
f = NULL;
-
- if (po->rollover) {
- kfree_rcu(po->rollover, rcu);
- po->rollover = NULL;
- }
}
mutex_unlock(&fanout_mutex);
@@ -3039,6 +3032,7 @@
synchronize_net();
if (f) {
+ kfree(po->rollover);
fanout_release_data(f);
kfree(f);
}
@@ -3107,6 +3101,10 @@
if (need_rehook) {
if (po->running) {
rcu_read_unlock();
+ /* prevents packet_notifier() from calling
+ * register_prot_hook()
+ */
+ po->num = 0;
__unregister_prot_hook(sk, true);
rcu_read_lock();
dev_curr = po->prot_hook.dev;
@@ -3115,6 +3113,7 @@
dev->ifindex);
}
+ BUG_ON(po->running);
po->num = proto;
po->prot_hook.type = proto;
@@ -3853,7 +3852,6 @@
void *data = &val;
union tpacket_stats_u st;
struct tpacket_rollover_stats rstats;
- struct packet_rollover *rollover;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3932,18 +3930,13 @@
0);
break;
case PACKET_ROLLOVER_STATS:
- rcu_read_lock();
- rollover = rcu_dereference(po->rollover);
- if (rollover) {
- rstats.tp_all = atomic_long_read(&rollover->num);
- rstats.tp_huge = atomic_long_read(&rollover->num_huge);
- rstats.tp_failed = atomic_long_read(&rollover->num_failed);
- data = &rstats;
- lv = sizeof(rstats);
- }
- rcu_read_unlock();
- if (!rollover)
+ if (!po->rollover)
return -EINVAL;
+ rstats.tp_all = atomic_long_read(&po->rollover->num);
+ rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
+ rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
+ data = &rstats;
+ lv = sizeof(rstats);
break;
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 9ee4631..d55bfc3 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -92,7 +92,6 @@
struct packet_rollover {
int sock;
- struct rcu_head rcu;
atomic_long_t num;
atomic_long_t num_huge;
atomic_long_t num_failed;
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index c985ecb..ae5ac17 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -252,7 +252,7 @@
const int pkt_len = 20;
struct qrtr_hdr *hdr;
struct sk_buff *skb;
- u32 *buf;
+ __le32 *buf;
skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
if (!skb)
@@ -269,7 +269,7 @@
hdr->dst_node_id = cpu_to_le32(dst_node);
hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
- buf = (u32 *)skb_put(skb, pkt_len);
+ buf = (__le32 *)skb_put(skb, pkt_len);
memset(buf, 0, pkt_len);
buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX);
buf[1] = cpu_to_le32(src_node);
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 60e90f7..f6027f4 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -183,7 +183,7 @@
long i;
int ret;
- if (rs->rs_bound_addr == 0) {
+ if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
@@ -524,6 +524,9 @@
local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
+ if (args->nr_local == 0)
+ return -EINVAL;
+
/* figure out the number of pages in the vector */
for (i = 0; i < args->nr_local; i++) {
if (copy_from_user(&vec, &local_vec[i],
@@ -873,6 +876,7 @@
err:
if (page)
put_page(page);
+ rm->atomic.op_active = 0;
kfree(rm->atomic.op_notifier);
return ret;
diff --git a/net/rds/send.c b/net/rds/send.c
index ad247dc..ef53d164 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1006,6 +1006,9 @@
continue;
if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
+ if (cmsg->cmsg_len <
+ CMSG_LEN(sizeof(struct rds_rdma_args)))
+ return -EINVAL;
args = CMSG_DATA(cmsg);
*rdma_bytes += args->remote_vec.bytes;
}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 20e2923..78f976d3 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -478,9 +478,10 @@
* we do need to clean up the listen socket here.
*/
if (rtn->rds_tcp_listen_sock) {
- rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+ struct socket *lsock = rtn->rds_tcp_listen_sock;
+
rtn->rds_tcp_listen_sock = NULL;
- flush_work(&rtn->rds_tcp_accept_w);
+ rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
}
}
@@ -517,10 +518,10 @@
struct rds_tcp_connection *tc, *_tc;
LIST_HEAD(tmp_list);
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct socket *lsock = rtn->rds_tcp_listen_sock;
- rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
rtn->rds_tcp_listen_sock = NULL;
- flush_work(&rtn->rds_tcp_accept_w);
+ rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
@@ -540,8 +541,12 @@
void *rds_tcp_listen_sock_def_readable(struct net *net)
{
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct socket *lsock = rtn->rds_tcp_listen_sock;
- return rtn->rds_tcp_listen_sock->sk->sk_user_data;
+ if (!lsock)
+ return NULL;
+
+ return lsock->sk->sk_user_data;
}
static int rds_tcp_dev_event(struct notifier_block *this,
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 9a1cc89..56ea662 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -66,7 +66,7 @@
/* tcp_listen.c */
struct socket *rds_tcp_listen_init(struct net *);
-void rds_tcp_listen_stop(struct socket *);
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
void rds_tcp_listen_data_ready(struct sock *sk);
int rds_tcp_accept_one(struct socket *sock);
int rds_tcp_keepalive(struct socket *sock);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 525b624..185a56b 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -227,6 +227,9 @@
* before it has been accepted and the accepter has set up their
* data_ready.. we only want to queue listen work for our listening
* socket
+ *
+ * (*ready)() may be null if we are racing with netns delete, and
+ * the listen socket is being torn down.
*/
if (sk->sk_state == TCP_LISTEN)
rds_tcp_accept_work(sk);
@@ -235,7 +238,8 @@
out:
read_unlock_bh(&sk->sk_callback_lock);
- ready(sk);
+ if (ready)
+ ready(sk);
}
struct socket *rds_tcp_listen_init(struct net *net)
@@ -275,7 +279,7 @@
return NULL;
}
-void rds_tcp_listen_stop(struct socket *sock)
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor)
{
struct sock *sk;
@@ -296,5 +300,6 @@
/* wait for accepts to stop and close the socket */
flush_workqueue(rds_wq);
+ flush_work(acceptor);
sock_release(sock);
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 3f9d8d7..b099b64 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -275,6 +275,10 @@
rxrpc_conn_retransmit_call(conn, skb);
return 0;
+ case RXRPC_PACKET_TYPE_BUSY:
+ /* Just ignore BUSY packets for now. */
+ return 0;
+
case RXRPC_PACKET_TYPE_ABORT:
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
&wtmp, sizeof(wtmp)) < 0)
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 44fb8d8..1060d14 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -649,6 +649,7 @@
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_peer *peer;
unsigned int mtu;
+ bool wake = false;
u32 rwind = ntohl(ackinfo->rwind);
_proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
@@ -656,9 +657,14 @@
ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
rwind, ntohl(ackinfo->jumbo_max));
- if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
- rwind = RXRPC_RXTX_BUFF_SIZE - 1;
- call->tx_winsize = rwind;
+ if (call->tx_winsize != rwind) {
+ if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+ rwind = RXRPC_RXTX_BUFF_SIZE - 1;
+ if (rwind > call->tx_winsize)
+ wake = true;
+ call->tx_winsize = rwind;
+ }
+
if (call->cong_ssthresh > rwind)
call->cong_ssthresh = rwind;
@@ -672,6 +678,9 @@
spin_unlock_bh(&peer->lock);
_net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
}
+
+ if (wake)
+ wake_up(&call->waitq);
}
/*
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e0aa30f..9617b42 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -161,7 +161,7 @@
if (action == TC_ACT_SHOT)
this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
- tm->lastuse = lastuse;
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6b07fba..fc3650b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -211,7 +211,7 @@
struct tcf_t *tm = &m->tcf_tm;
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- tm->lastuse = lastuse;
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ae83c3ae..da574a1 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -496,6 +496,7 @@
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
u32 flags)
{
+ struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_u32_offload u32_offload = {0};
struct tc_to_netdev offload;
@@ -520,7 +521,7 @@
offload.cls_u32->knode.sel = &n->sel;
offload.cls_u32->knode.exts = &n->exts;
if (n->ht_down)
- offload.cls_u32->knode.link_handle = n->ht_down->handle;
+ offload.cls_u32->knode.link_handle = ht->handle;
err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
tp->protocol, &offload);
@@ -788,8 +789,9 @@
static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
struct tc_u_knode *n)
{
- struct tc_u_knode *new;
+ struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
struct tc_u32_sel *s = &n->sel;
+ struct tc_u_knode *new;
new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
GFP_KERNEL);
@@ -807,11 +809,11 @@
new->fshift = n->fshift;
new->res = n->res;
new->flags = n->flags;
- RCU_INIT_POINTER(new->ht_down, n->ht_down);
+ RCU_INIT_POINTER(new->ht_down, ht);
/* bump reference count as long as we hold pointer to structure */
- if (new->ht_down)
- new->ht_down->refcnt++;
+ if (ht)
+ ht->refcnt++;
#ifdef CONFIG_CLS_U32_PERF
/* Statistics may be incremented by readers during update
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 1308bbf..b56d579 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -200,9 +200,13 @@
pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
if (p->set_tc_index) {
+ int wlen = skb_network_offset(skb);
+
switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
- if (skb_cow_head(skb, sizeof(struct iphdr)))
+ wlen += sizeof(struct iphdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
goto drop;
skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
@@ -210,7 +214,9 @@
break;
case htons(ETH_P_IPV6):
- if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
+ wlen += sizeof(struct ipv6hdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
goto drop;
skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 95d7b15..e371a0d 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -166,7 +166,7 @@
/* Lookup timer debug name. */
const char *sctp_tname(const sctp_subtype_t id)
{
- if (id.timeout <= SCTP_EVENT_TIMEOUT_MAX)
+ if (id.timeout < ARRAY_SIZE(sctp_timer_tbl))
return sctp_timer_tbl[id.timeout];
return "unknown_timer";
}
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 4f5a2b5..6300f28 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -44,6 +44,9 @@
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sctphdr *sh;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
+ goto out;
+
sh = sctp_hdr(skb);
if (!pskb_may_pull(skb, sizeof(*sh)))
goto out;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 5825853..0994ce4 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -382,17 +382,18 @@
}
static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
- struct sctp_sndrcvinfo *sinfo,
- struct list_head *queue, int msg_len)
+ struct sctp_sndrcvinfo *sinfo, int msg_len)
{
+ struct sctp_outq *q = &asoc->outqueue;
struct sctp_chunk *chk, *temp;
- list_for_each_entry_safe(chk, temp, queue, list) {
+ list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
list_del_init(&chk->list);
+ q->out_qlen -= chk->skb->len;
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
@@ -431,9 +432,7 @@
return;
}
- sctp_prsctp_prune_unsent(asoc, sinfo,
- &asoc->outqueue.out_chunk_list,
- msg_len);
+ sctp_prsctp_prune_unsent(asoc, sinfo, msg_len);
}
/* Mark all the eligible packets on a transport for retransmission. */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index c062cea..c472b83 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -82,7 +82,7 @@
/* Forward declarations for internal helper functions. */
static int sctp_writeable(struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
-static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p,
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
size_t msg_len);
static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
@@ -332,16 +332,14 @@
if (len < sizeof (struct sockaddr))
return NULL;
+ if (!opt->pf->af_supported(addr->sa.sa_family, opt))
+ return NULL;
+
/* V4 mapped address are really of AF_INET family */
if (addr->sa.sa_family == AF_INET6 &&
- ipv6_addr_v4mapped(&addr->v6.sin6_addr)) {
- if (!opt->pf->af_supported(AF_INET, opt))
- return NULL;
- } else {
- /* Does this PF support this AF? */
- if (!opt->pf->af_supported(addr->sa.sa_family, opt))
- return NULL;
- }
+ ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
+ !opt->pf->af_supported(AF_INET, opt))
+ return NULL;
/* If we get this far, af is valid. */
af = sctp_get_af_specific(addr->sa.sa_family);
@@ -1957,9 +1955,16 @@
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
+ /* sk can be changed by peel off when waiting for buf. */
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
- if (err)
+ if (err) {
+ if (err == -ESRCH) {
+ /* asoc is already dead. */
+ new_asoc = NULL;
+ err = -EPIPE;
+ }
goto out_free;
+ }
}
/* If an address is passed with the sendto/sendmsg call, it is used
@@ -4239,7 +4244,7 @@
SCTP_DBG_OBJCNT_INC(sock);
local_bh_disable();
- percpu_counter_inc(&sctp_sockets_allocated);
+ sk_sockets_allocated_inc(sk);
sock_prot_inuse_add(net, sk->sk_prot, 1);
/* Nothing can fail after this block, otherwise
@@ -4283,7 +4288,7 @@
}
sctp_endpoint_free(sp->ep);
local_bh_disable();
- percpu_counter_dec(&sctp_sockets_allocated);
+ sk_sockets_allocated_dec(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
local_bh_enable();
}
@@ -4771,12 +4776,6 @@
if (!asoc)
return -EINVAL;
- /* If there is a thread waiting on more sndbuf space for
- * sending on this asoc, it cannot be peeled.
- */
- if (waitqueue_active(&asoc->wait))
- return -EBUSY;
-
/* An association cannot be branched off from an already peeled-off
* socket, nor is this supported for tcp style sockets.
*/
@@ -7443,9 +7442,9 @@
size_t msg_len)
{
struct sock *sk = asoc->base.sk;
- int err = 0;
long current_timeo = *timeo_p;
DEFINE_WAIT(wait);
+ int err = 0;
pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc,
*timeo_p, msg_len);
@@ -7457,10 +7456,11 @@
for (;;) {
prepare_to_wait_exclusive(&asoc->wait, &wait,
TASK_INTERRUPTIBLE);
+ if (asoc->base.dead)
+ goto do_dead;
if (!*timeo_p)
goto do_nonblock;
- if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
- asoc->base.dead)
+ if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING)
goto do_error;
if (signal_pending(current))
goto do_interrupted;
@@ -7473,6 +7473,8 @@
release_sock(sk);
current_timeo = schedule_timeout(current_timeo);
lock_sock(sk);
+ if (sk != asoc->base.sk)
+ goto do_error;
*timeo_p = current_timeo;
}
@@ -7485,6 +7487,10 @@
return err;
+do_dead:
+ err = -ESRCH;
+ goto out;
+
do_error:
err = -EPIPE;
goto out;
diff --git a/net/socket.c b/net/socket.c
index 40beb5a..34dabf4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1716,6 +1716,7 @@
/* We assume all kernel code knows the size of sockaddr_storage */
msg.msg_namelen = 0;
msg.msg_iocb = NULL;
+ msg.msg_flags = 0;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg, flags);
@@ -2561,6 +2562,15 @@
core_initcall(sock_init); /* early initcall */
+static int __init jit_init(void)
+{
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+ bpf_jit_enable = 1;
+#endif
+ return 0;
+}
+pure_initcall(jit_init);
+
#ifdef CONFIG_PROC_FS
void socket_seq_show(struct seq_file *seq)
{
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 25d9a9c..624c322 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -231,6 +231,7 @@
goto out_free_groups;
creds->cr_group_info->gid[i] = kgid;
}
+ groups_sort(creds->cr_group_info);
return 0;
out_free_groups:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 1530825..6a08bc4 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -481,6 +481,7 @@
goto out;
rsci.cred.cr_group_info->gid[i] = kgid;
}
+ groups_sort(rsci.cred.cr_group_info);
/* mech name */
len = qword_get(&mesg, buf, mlen);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5db68b3..600eacc 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -274,10 +274,9 @@
static void rpc_set_active(struct rpc_task *task)
{
- trace_rpc_task_begin(task->tk_client, task, NULL);
-
rpc_task_set_debuginfo(task);
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
+ trace_rpc_task_begin(task->tk_client, task, NULL);
}
/*
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 64af4f0..738a243 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -520,6 +520,7 @@
ug.gi->gid[i] = kgid;
}
+ groups_sort(ug.gi);
ugp = unix_gid_lookup(cd, uid);
if (ugp) {
struct cache_head *ch;
@@ -819,6 +820,7 @@
kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
cred->cr_group_info->gid[i] = kgid;
}
+ groups_sort(cred->cr_group_info);
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
return SVC_DENIED;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e01c825..d24d14e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2381,6 +2381,7 @@
case -ECONNREFUSED:
case -ECONNRESET:
case -ENETUNREACH:
+ case -EHOSTUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
/* retry with existing socket, after a delay */
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2775332..5b3e1ea 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1848,36 +1848,38 @@
if (strcmp(name, tipc_bclink_name) == 0) {
err = tipc_nl_add_bc_link(net, &msg);
- if (err) {
- nlmsg_free(msg.skb);
- return err;
- }
+ if (err)
+ goto err_free;
} else {
int bearer_id;
struct tipc_node *node;
struct tipc_link *link;
node = tipc_node_find_by_name(net, name, &bearer_id);
- if (!node)
- return -EINVAL;
+ if (!node) {
+ err = -EINVAL;
+ goto err_free;
+ }
tipc_node_read_lock(node);
link = node->links[bearer_id].link;
if (!link) {
tipc_node_read_unlock(node);
- nlmsg_free(msg.skb);
- return -EINVAL;
+ err = -EINVAL;
+ goto err_free;
}
err = __tipc_nl_add_link(net, &msg, link, 0);
tipc_node_read_unlock(node);
- if (err) {
- nlmsg_free(msg.skb);
- return err;
- }
+ if (err)
+ goto err_free;
}
return genlmsg_reply(msg.skb, info);
+
+err_free:
+ nlmsg_free(msg.skb);
+ return err;
}
int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/tipc/server.c b/net/tipc/server.c
index f89c0c2..f4c1b18 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -86,7 +86,6 @@
static void tipc_recv_work(struct work_struct *work);
static void tipc_send_work(struct work_struct *work);
static void tipc_clean_outqueues(struct tipc_conn *con);
-static void tipc_sock_release(struct tipc_conn *con);
static void tipc_conn_kref_release(struct kref *kref)
{
@@ -104,7 +103,6 @@
}
saddr->scope = -TIPC_NODE_SCOPE;
kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
- tipc_sock_release(con);
sock_release(sock);
con->sock = NULL;
@@ -194,19 +192,15 @@
write_unlock_bh(&sk->sk_callback_lock);
}
-static void tipc_sock_release(struct tipc_conn *con)
+static void tipc_close_conn(struct tipc_conn *con)
{
struct tipc_server *s = con->server;
- if (con->conid)
- s->tipc_conn_release(con->conid, con->usr_data);
-
- tipc_unregister_callbacks(con);
-}
-
-static void tipc_close_conn(struct tipc_conn *con)
-{
if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
+ tipc_unregister_callbacks(con);
+
+ if (con->conid)
+ s->tipc_conn_release(con->conid, con->usr_data);
/* We shouldn't flush pending works as we may be in the
* thread. In fact the races with pending rx/tx work structs
@@ -319,6 +313,7 @@
newcon->usr_data = s->tipc_conn_new(newcon->conid);
if (!newcon->usr_data) {
sock_release(newsock);
+ conn_put(newcon);
return -ENOMEM;
}
@@ -625,14 +620,12 @@
void tipc_server_stop(struct tipc_server *s)
{
struct tipc_conn *con;
- int total = 0;
int id;
spin_lock_bh(&s->idr_lock);
- for (id = 0; total < s->idr_in_use; id++) {
+ for (id = 0; s->idr_in_use; id++) {
con = idr_find(&s->conn_idr, id);
if (con) {
- total++;
spin_unlock_bh(&s->idr_lock);
tipc_close_conn(con);
spin_lock_bh(&s->idr_lock);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 9d94e65..271cd66 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -141,6 +141,11 @@
static void tipc_subscrp_timeout(unsigned long data)
{
struct tipc_subscription *sub = (struct tipc_subscription *)data;
+ struct tipc_subscriber *subscriber = sub->subscriber;
+
+ spin_lock_bh(&subscriber->lock);
+ tipc_nametbl_unsubscribe(sub);
+ spin_unlock_bh(&subscriber->lock);
/* Notify subscriber of timeout */
tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
@@ -173,7 +178,6 @@
struct tipc_subscriber *subscriber = sub->subscriber;
spin_lock_bh(&subscriber->lock);
- tipc_nametbl_unsubscribe(sub);
list_del(&sub->subscrp_list);
atomic_dec(&tn->subscription_count);
spin_unlock_bh(&subscriber->lock);
@@ -205,6 +209,7 @@
if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
continue;
+ tipc_nametbl_unsubscribe(sub);
tipc_subscrp_get(sub);
spin_unlock_bh(&subscriber->lock);
tipc_subscrp_delete(sub);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index b58dc95..107375d 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -371,10 +371,6 @@
goto rcu_out;
}
- tipc_rcv(sock_net(sk), skb, b);
- rcu_read_unlock();
- return 0;
-
rcu_out:
rcu_read_unlock();
out:
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 2f633ee..ee12e17 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1101,10 +1101,19 @@
.sendpage = sock_no_sendpage,
};
+static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+ if (!transport->cancel_pkt)
+ return -EOPNOTSUPP;
+
+ return transport->cancel_pkt(vsk);
+}
+
static void vsock_connect_timeout(struct work_struct *work)
{
struct sock *sk;
struct vsock_sock *vsk;
+ int cancel = 0;
vsk = container_of(work, struct vsock_sock, dwork.work);
sk = sk_vsock(vsk);
@@ -1115,8 +1124,11 @@
sk->sk_state = SS_UNCONNECTED;
sk->sk_err = ETIMEDOUT;
sk->sk_error_report(sk);
+ cancel = 1;
}
release_sock(sk);
+ if (cancel)
+ vsock_transport_cancel_pkt(vsk);
sock_put(sk);
}
@@ -1223,11 +1235,13 @@
err = sock_intr_errno(timeout);
sk->sk_state = SS_UNCONNECTED;
sock->state = SS_UNCONNECTED;
+ vsock_transport_cancel_pkt(vsk);
goto out_wait;
} else if (timeout == 0) {
err = -ETIMEDOUT;
sk->sk_state = SS_UNCONNECTED;
sock->state = SS_UNCONNECTED;
+ vsock_transport_cancel_pkt(vsk);
goto out_wait;
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 62c056e..9c07c76 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -57,6 +57,7 @@
pkt->len = len;
pkt->hdr.len = cpu_to_le32(len);
pkt->reply = info->reply;
+ pkt->vsk = info->vsk;
if (info->msg && len > 0) {
pkt->buf = kmalloc(len, GFP_KERNEL);
@@ -180,6 +181,7 @@
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
.type = type,
+ .vsk = vsk,
};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -519,6 +521,7 @@
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_REQUEST,
.type = VIRTIO_VSOCK_TYPE_STREAM,
+ .vsk = vsk,
};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -534,6 +537,7 @@
VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
(mode & SEND_SHUTDOWN ?
VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
+ .vsk = vsk,
};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -560,6 +564,7 @@
.type = VIRTIO_VSOCK_TYPE_STREAM,
.msg = msg,
.pkt_len = len,
+ .vsk = vsk,
};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -581,6 +586,7 @@
.op = VIRTIO_VSOCK_OP_RST,
.type = VIRTIO_VSOCK_TYPE_STREAM,
.reply = !!pkt,
+ .vsk = vsk,
};
/* Send RST only if the original pkt is not a RST pkt */
@@ -826,6 +832,7 @@
.remote_cid = le64_to_cpu(pkt->hdr.src_cid),
.remote_port = le32_to_cpu(pkt->hdr.src_port),
.reply = true,
+ .vsk = vsk,
};
return virtio_transport_send_pkt_info(vsk, &info);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c626f67..91722e9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -16,6 +16,7 @@
#include <linux/nl80211.h>
#include <linux/rtnetlink.h>
#include <linux/netlink.h>
+#include <linux/nospec.h>
#include <linux/etherdevice.h>
#include <net/net_namespace.h>
#include <net/genetlink.h>
@@ -2014,20 +2015,22 @@
static int parse_txq_params(struct nlattr *tb[],
struct ieee80211_txq_params *txq_params)
{
+ u8 ac;
+
if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] ||
!tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] ||
!tb[NL80211_TXQ_ATTR_AIFS])
return -EINVAL;
- txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
+ ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]);
txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]);
txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]);
txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]);
- if (txq_params->ac >= NL80211_NUM_ACS)
+ if (ac >= NL80211_NUM_ACS)
return -EINVAL;
-
+ txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS);
return 0;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index dbb91e0..8b3fef7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1393,6 +1393,7 @@
newp->xfrm_nr = old->xfrm_nr;
newp->index = old->index;
newp->type = old->type;
+ newp->family = old->family;
memcpy(newp->xfrm_vec, old->xfrm_vec,
newp->xfrm_nr*sizeof(struct xfrm_tmpl));
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2cade02..b09febe 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -872,13 +872,13 @@
&x->replay);
if (ret)
goto out;
- if (x->security)
- ret = copy_sec_ctx(x->security, skb);
if (x->props.output_mark) {
ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark);
if (ret)
goto out;
}
+ if (x->security)
+ ret = copy_sec_ctx(x->security, skb);
out:
return ret;
}
diff --git a/scripts/coccicheck b/scripts/coccicheck
index ec487b8..c36b04b 100755
--- a/scripts/coccicheck
+++ b/scripts/coccicheck
@@ -29,12 +29,6 @@
VERBOSE=0
fi
-if [ -z "$J" ]; then
- NPROC=$(getconf _NPROCESSORS_ONLN)
-else
- NPROC="$J"
-fi
-
FLAGS="--very-quiet"
# You can use SPFLAGS to append extra arguments to coccicheck or override any
@@ -69,6 +63,9 @@
# Take only the last argument, which is the C file to test
shift $(( $# - 1 ))
OPTIONS="$COCCIINCLUDE $1"
+
+ # No need to parallelize Coccinelle since this mode takes one input file.
+ NPROC=1
else
ONLINE=0
if [ "$KBUILD_EXTMOD" = "" ] ; then
@@ -76,6 +73,12 @@
else
OPTIONS="--dir $KBUILD_EXTMOD $COCCIINCLUDE"
fi
+
+ if [ -z "$J" ]; then
+ NPROC=$(getconf _NPROCESSORS_ONLN)
+ else
+ NPROC="$J"
+ fi
fi
if [ "$KBUILD_EXTMOD" != "" ] ; then
diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py
index 1bf949c..f6ab3cc 100644
--- a/scripts/gdb/linux/tasks.py
+++ b/scripts/gdb/linux/tasks.py
@@ -96,6 +96,8 @@
thread_info_addr = task.address + ia64_task_size
thread_info = thread_info_addr.cast(thread_info_ptr_type)
else:
+ if task.type.fields()[0].type == thread_info_type.get_type():
+ return task['thread_info']
thread_info = task['stack'].cast(thread_info_ptr_type)
return thread_info.dereference()
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index bd83497..238db4f 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -838,6 +838,7 @@
".cmem*", /* EZchip */
".fmt_slot*", /* EZchip */
".gnu.lto*",
+ ".discard.*",
NULL
};
@@ -2129,6 +2130,14 @@
buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n");
}
+/* Cannot check for assembler */
+static void add_retpoline(struct buffer *b)
+{
+ buf_printf(b, "\n#ifdef RETPOLINE\n");
+ buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n");
+ buf_printf(b, "#endif\n");
+}
+
static void add_staging_flag(struct buffer *b, const char *name)
{
static const char *staging_dir = "drivers/staging";
@@ -2473,6 +2482,7 @@
add_header(&buf, mod);
add_intree_flag(&buf, !external_module);
+ add_retpoline(&buf);
add_staging_flag(&buf, mod->name);
err |= add_versions(&buf, mod);
add_depends(&buf, mod, modules);
diff --git a/scripts/module-common.lds b/scripts/module-common.lds
index 73a2c7d..9b6e246 100644
--- a/scripts/module-common.lds
+++ b/scripts/module-common.lds
@@ -4,7 +4,10 @@
* combine them automatically.
*/
SECTIONS {
- /DISCARD/ : { *(.discard) }
+ /DISCARD/ : {
+ *(.discard)
+ *(.discard.*)
+ }
__ksymtab 0 : { *(SORT(___ksymtab+*)) }
__ksymtab_gpl 0 : { *(SORT(___ksymtab_gpl+*)) }
@@ -19,4 +22,6 @@
. = ALIGN(8);
.init_array 0 : { *(SORT(.init_array.*)) *(.init_array) }
+
+ __jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) }
}
diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index 71b4a8a..7badec3 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -39,10 +39,9 @@
false; \
fi ; \
$(srctree)/scripts/setlocalversion --save-scmversion; \
-ln -sf $(srctree) $(2); \
tar -cz $(RCS_TAR_IGNORE) -f $(2).tar.gz \
- $(addprefix $(2)/,$(TAR_CONTENT) $(3)); \
-rm -f $(2) $(objtree)/.scmversion
+ --transform 's:^:$(2)/:S' $(TAR_CONTENT) $(3); \
+rm -f $(objtree)/.scmversion
# rpm-pkg
# ---------------------------------------------------------------------------
diff --git a/security/Kconfig b/security/Kconfig
index 59aea7d..80a2934 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -40,6 +40,16 @@
If you are unsure how to answer this question, answer N.
+config PAGE_TABLE_ISOLATION
+ bool "Remove the kernel mapping in user mode"
+ default y
+ depends on X86_64 && SMP
+ help
+ This enforces a strict kernel and user space isolation, in order
+ to close hardware side channels on kernel address information.
+
+ If you are unsure how to answer this question, answer Y.
+
config SECURITYFS
bool "Enable the securityfs filesystem"
help
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 0e87629..2b3def1 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -51,6 +51,8 @@
ima_hash_algo = HASH_ALGO_SHA1;
else if (strncmp(str, "md5", 3) == 0)
ima_hash_algo = HASH_ALGO_MD5;
+ else
+ return 1;
goto out;
}
@@ -60,6 +62,8 @@
break;
}
}
+ if (i == HASH_ALGO__LAST)
+ return 1;
out:
hash_setup_done = 1;
return 1;
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index ae70e82..1f3d600 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -141,23 +141,22 @@
*/
static int valid_master_desc(const char *new_desc, const char *orig_desc)
{
- if (!memcmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) {
- if (strlen(new_desc) == KEY_TRUSTED_PREFIX_LEN)
- goto out;
- if (orig_desc)
- if (memcmp(new_desc, orig_desc, KEY_TRUSTED_PREFIX_LEN))
- goto out;
- } else if (!memcmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) {
- if (strlen(new_desc) == KEY_USER_PREFIX_LEN)
- goto out;
- if (orig_desc)
- if (memcmp(new_desc, orig_desc, KEY_USER_PREFIX_LEN))
- goto out;
- } else
- goto out;
+ int prefix_len;
+
+ if (!strncmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN))
+ prefix_len = KEY_TRUSTED_PREFIX_LEN;
+ else if (!strncmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN))
+ prefix_len = KEY_USER_PREFIX_LEN;
+ else
+ return -EINVAL;
+
+ if (!new_desc[prefix_len])
+ return -EINVAL;
+
+ if (orig_desc && strncmp(new_desc, orig_desc, prefix_len))
+ return -EINVAL;
+
return 0;
-out:
- return -EINVAL;
}
/*
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 5030fcf..cb7f8f7 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -250,11 +250,12 @@
* The keyring selected is returned with an extra reference upon it which the
* caller must release.
*/
-static void construct_get_dest_keyring(struct key **_dest_keyring)
+static int construct_get_dest_keyring(struct key **_dest_keyring)
{
struct request_key_auth *rka;
const struct cred *cred = current_cred();
struct key *dest_keyring = *_dest_keyring, *authkey;
+ int ret;
kenter("%p", dest_keyring);
@@ -263,6 +264,8 @@
/* the caller supplied one */
key_get(dest_keyring);
} else {
+ bool do_perm_check = true;
+
/* use a default keyring; falling through the cases until we
* find one that we actually have */
switch (cred->jit_keyring) {
@@ -277,8 +280,10 @@
dest_keyring =
key_get(rka->dest_keyring);
up_read(&authkey->sem);
- if (dest_keyring)
+ if (dest_keyring) {
+ do_perm_check = false;
break;
+ }
}
case KEY_REQKEY_DEFL_THREAD_KEYRING:
@@ -313,11 +318,29 @@
default:
BUG();
}
+
+ /*
+ * Require Write permission on the keyring. This is essential
+ * because the default keyring may be the session keyring, and
+ * joining a keyring only requires Search permission.
+ *
+ * However, this check is skipped for the "requestor keyring" so
+ * that /sbin/request-key can itself use request_key() to add
+ * keys to the original requestor's destination keyring.
+ */
+ if (dest_keyring && do_perm_check) {
+ ret = key_permission(make_key_ref(dest_keyring, 1),
+ KEY_NEED_WRITE);
+ if (ret) {
+ key_put(dest_keyring);
+ return ret;
+ }
+ }
}
*_dest_keyring = dest_keyring;
kleave(" [dk %d]", key_serial(dest_keyring));
- return;
+ return 0;
}
/*
@@ -443,11 +466,15 @@
if (ctx->index_key.type == &key_type_keyring)
return ERR_PTR(-EPERM);
- user = key_user_lookup(current_fsuid());
- if (!user)
- return ERR_PTR(-ENOMEM);
+ ret = construct_get_dest_keyring(&dest_keyring);
+ if (ret)
+ goto error;
- construct_get_dest_keyring(&dest_keyring);
+ user = key_user_lookup(current_fsuid());
+ if (!user) {
+ ret = -ENOMEM;
+ goto error_put_dest_keyring;
+ }
ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key);
key_user_put(user);
@@ -462,7 +489,7 @@
} else if (ret == -EINPROGRESS) {
ret = 0;
} else {
- goto couldnt_alloc_key;
+ goto error_put_dest_keyring;
}
key_put(dest_keyring);
@@ -472,8 +499,9 @@
construction_failed:
key_negate_and_link(key, key_negative_timeout, NULL, NULL);
key_put(key);
-couldnt_alloc_key:
+error_put_dest_keyring:
key_put(dest_keyring);
+error:
kleave(" = %d", ret);
return ERR_PTR(ret);
}
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index ebc9fdf..3321348 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -466,7 +466,6 @@
v = snd_pcm_hw_param_last(pcm, params, var, dir);
else
v = snd_pcm_hw_param_first(pcm, params, var, dir);
- snd_BUG_ON(v < 0);
return v;
}
@@ -1370,8 +1369,11 @@
if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
return tmp;
- mutex_lock(&runtime->oss.params_lock);
while (bytes > 0) {
+ if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
+ tmp = -ERESTARTSYS;
+ break;
+ }
if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
tmp = bytes;
if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes)
@@ -1415,14 +1417,18 @@
xfer += tmp;
if ((substream->f_flags & O_NONBLOCK) != 0 &&
tmp != runtime->oss.period_bytes)
- break;
+ tmp = -EAGAIN;
}
- }
- mutex_unlock(&runtime->oss.params_lock);
- return xfer;
-
err:
- mutex_unlock(&runtime->oss.params_lock);
+ mutex_unlock(&runtime->oss.params_lock);
+ if (tmp < 0)
+ break;
+ if (signal_pending(current)) {
+ tmp = -ERESTARTSYS;
+ break;
+ }
+ tmp = 0;
+ }
return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
}
@@ -1470,8 +1476,11 @@
if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
return tmp;
- mutex_lock(&runtime->oss.params_lock);
while (bytes > 0) {
+ if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
+ tmp = -ERESTARTSYS;
+ break;
+ }
if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
if (runtime->oss.buffer_used == 0) {
tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1);
@@ -1502,12 +1511,16 @@
bytes -= tmp;
xfer += tmp;
}
- }
- mutex_unlock(&runtime->oss.params_lock);
- return xfer;
-
err:
- mutex_unlock(&runtime->oss.params_lock);
+ mutex_unlock(&runtime->oss.params_lock);
+ if (tmp < 0)
+ break;
+ if (signal_pending(current)) {
+ tmp = -ERESTARTSYS;
+ break;
+ }
+ tmp = 0;
+ }
return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
}
diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c
index 727ac44..a84a1d3 100644
--- a/sound/core/oss/pcm_plugin.c
+++ b/sound/core/oss/pcm_plugin.c
@@ -591,18 +591,26 @@
snd_pcm_sframes_t frames = size;
plugin = snd_pcm_plug_first(plug);
- while (plugin && frames > 0) {
+ while (plugin) {
+ if (frames <= 0)
+ return frames;
if ((next = plugin->next) != NULL) {
snd_pcm_sframes_t frames1 = frames;
- if (plugin->dst_frames)
+ if (plugin->dst_frames) {
frames1 = plugin->dst_frames(plugin, frames);
+ if (frames1 <= 0)
+ return frames1;
+ }
if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) {
return err;
}
if (err != frames1) {
frames = err;
- if (plugin->src_frames)
+ if (plugin->src_frames) {
frames = plugin->src_frames(plugin, frames1);
+ if (frames <= 0)
+ return frames;
+ }
}
} else
dst_channels = NULL;
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index 8e980aa..074363b 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -149,7 +149,9 @@
err = -ENXIO;
goto _error;
}
+ mutex_lock(&pcm->open_mutex);
err = snd_pcm_info_user(substream, info);
+ mutex_unlock(&pcm->open_mutex);
_error:
mutex_unlock(®ister_mutex);
return err;
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index c80d80e..9306604 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -578,7 +578,6 @@
{
u_int64_t n = (u_int64_t) a * b;
if (c == 0) {
- snd_BUG_ON(!n);
*r = 0;
return UINT_MAX;
}
@@ -1664,7 +1663,7 @@
return changed;
if (params->rmask) {
int err = snd_pcm_hw_refine(pcm, params);
- if (snd_BUG_ON(err < 0))
+ if (err < 0)
return err;
}
return snd_pcm_hw_param_value(params, var, dir);
@@ -1711,7 +1710,7 @@
return changed;
if (params->rmask) {
int err = snd_pcm_hw_refine(pcm, params);
- if (snd_BUG_ON(err < 0))
+ if (err < 0)
return err;
}
return snd_pcm_hw_param_value(params, var, dir);
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index b450a27..16f8124 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -579,15 +579,14 @@
return 0;
}
-int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+static int __snd_rawmidi_info_select(struct snd_card *card,
+ struct snd_rawmidi_info *info)
{
struct snd_rawmidi *rmidi;
struct snd_rawmidi_str *pstr;
struct snd_rawmidi_substream *substream;
- mutex_lock(®ister_mutex);
rmidi = snd_rawmidi_search(card, info->device);
- mutex_unlock(®ister_mutex);
if (!rmidi)
return -ENXIO;
if (info->stream < 0 || info->stream > 1)
@@ -603,6 +602,16 @@
}
return -ENXIO;
}
+
+int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+{
+ int ret;
+
+ mutex_lock(®ister_mutex);
+ ret = __snd_rawmidi_info_select(card, info);
+ mutex_unlock(®ister_mutex);
+ return ret;
+}
EXPORT_SYMBOL(snd_rawmidi_info_select);
static int snd_rawmidi_info_select_user(struct snd_card *card,
diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 45ef591..16580a8 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -221,6 +221,7 @@
rwlock_init(&client->ports_lock);
mutex_init(&client->ports_mutex);
INIT_LIST_HEAD(&client->ports_list_head);
+ mutex_init(&client->ioctl_mutex);
/* find free slot in the client table */
spin_lock_irqsave(&clients_lock, flags);
@@ -2127,7 +2128,9 @@
return -EFAULT;
}
+ mutex_lock(&client->ioctl_mutex);
err = handler->func(client, &buf);
+ mutex_unlock(&client->ioctl_mutex);
if (err >= 0) {
/* Some commands includes a bug in 'dir' field. */
if (handler->cmd == SNDRV_SEQ_IOCTL_SET_QUEUE_CLIENT ||
diff --git a/sound/core/seq/seq_clientmgr.h b/sound/core/seq/seq_clientmgr.h
index c661425..0611e1e 100644
--- a/sound/core/seq/seq_clientmgr.h
+++ b/sound/core/seq/seq_clientmgr.h
@@ -61,6 +61,7 @@
struct list_head ports_list_head;
rwlock_t ports_lock;
struct mutex ports_mutex;
+ struct mutex ioctl_mutex;
int convert32; /* convert 32->64bit */
/* output pool */
diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c
index 37d9cfb..b80985f 100644
--- a/sound/core/seq/seq_timer.c
+++ b/sound/core/seq/seq_timer.c
@@ -355,7 +355,7 @@
unsigned long freq;
t = tmr->timeri->timer;
- if (snd_BUG_ON(!t))
+ if (!t)
return -EINVAL;
freq = tmr->preferred_resolution;
diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
index 54f348a..cbd20cb 100644
--- a/sound/drivers/aloop.c
+++ b/sound/drivers/aloop.c
@@ -39,6 +39,7 @@
#include <sound/core.h>
#include <sound/control.h>
#include <sound/pcm.h>
+#include <sound/pcm_params.h>
#include <sound/info.h>
#include <sound/initval.h>
@@ -305,19 +306,6 @@
return 0;
}
-static void params_change_substream(struct loopback_pcm *dpcm,
- struct snd_pcm_runtime *runtime)
-{
- struct snd_pcm_runtime *dst_runtime;
-
- if (dpcm == NULL || dpcm->substream == NULL)
- return;
- dst_runtime = dpcm->substream->runtime;
- if (dst_runtime == NULL)
- return;
- dst_runtime->hw = dpcm->cable->hw;
-}
-
static void params_change(struct snd_pcm_substream *substream)
{
struct snd_pcm_runtime *runtime = substream->runtime;
@@ -329,10 +317,6 @@
cable->hw.rate_max = runtime->rate;
cable->hw.channels_min = runtime->channels;
cable->hw.channels_max = runtime->channels;
- params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK],
- runtime);
- params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE],
- runtime);
}
static int loopback_prepare(struct snd_pcm_substream *substream)
@@ -620,26 +604,29 @@
static int rule_format(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
+ struct loopback_pcm *dpcm = rule->private;
+ struct loopback_cable *cable = dpcm->cable;
+ struct snd_mask m;
- struct snd_pcm_hardware *hw = rule->private;
- struct snd_mask *maskp = hw_param_mask(params, rule->var);
-
- maskp->bits[0] &= (u_int32_t)hw->formats;
- maskp->bits[1] &= (u_int32_t)(hw->formats >> 32);
- memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */
- if (! maskp->bits[0] && ! maskp->bits[1])
- return -EINVAL;
- return 0;
+ snd_mask_none(&m);
+ mutex_lock(&dpcm->loopback->cable_lock);
+ m.bits[0] = (u_int32_t)cable->hw.formats;
+ m.bits[1] = (u_int32_t)(cable->hw.formats >> 32);
+ mutex_unlock(&dpcm->loopback->cable_lock);
+ return snd_mask_refine(hw_param_mask(params, rule->var), &m);
}
static int rule_rate(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
- struct snd_pcm_hardware *hw = rule->private;
+ struct loopback_pcm *dpcm = rule->private;
+ struct loopback_cable *cable = dpcm->cable;
struct snd_interval t;
- t.min = hw->rate_min;
- t.max = hw->rate_max;
+ mutex_lock(&dpcm->loopback->cable_lock);
+ t.min = cable->hw.rate_min;
+ t.max = cable->hw.rate_max;
+ mutex_unlock(&dpcm->loopback->cable_lock);
t.openmin = t.openmax = 0;
t.integer = 0;
return snd_interval_refine(hw_param_interval(params, rule->var), &t);
@@ -648,22 +635,44 @@
static int rule_channels(struct snd_pcm_hw_params *params,
struct snd_pcm_hw_rule *rule)
{
- struct snd_pcm_hardware *hw = rule->private;
+ struct loopback_pcm *dpcm = rule->private;
+ struct loopback_cable *cable = dpcm->cable;
struct snd_interval t;
- t.min = hw->channels_min;
- t.max = hw->channels_max;
+ mutex_lock(&dpcm->loopback->cable_lock);
+ t.min = cable->hw.channels_min;
+ t.max = cable->hw.channels_max;
+ mutex_unlock(&dpcm->loopback->cable_lock);
t.openmin = t.openmax = 0;
t.integer = 0;
return snd_interval_refine(hw_param_interval(params, rule->var), &t);
}
+static void free_cable(struct snd_pcm_substream *substream)
+{
+ struct loopback *loopback = substream->private_data;
+ int dev = get_cable_index(substream);
+ struct loopback_cable *cable;
+
+ cable = loopback->cables[substream->number][dev];
+ if (!cable)
+ return;
+ if (cable->streams[!substream->stream]) {
+ /* other stream is still alive */
+ cable->streams[substream->stream] = NULL;
+ } else {
+ /* free the cable */
+ loopback->cables[substream->number][dev] = NULL;
+ kfree(cable);
+ }
+}
+
static int loopback_open(struct snd_pcm_substream *substream)
{
struct snd_pcm_runtime *runtime = substream->runtime;
struct loopback *loopback = substream->private_data;
struct loopback_pcm *dpcm;
- struct loopback_cable *cable;
+ struct loopback_cable *cable = NULL;
int err = 0;
int dev = get_cable_index(substream);
@@ -682,7 +691,6 @@
if (!cable) {
cable = kzalloc(sizeof(*cable), GFP_KERNEL);
if (!cable) {
- kfree(dpcm);
err = -ENOMEM;
goto unlock;
}
@@ -700,19 +708,19 @@
/* are cached -> they do not reflect the actual state */
err = snd_pcm_hw_rule_add(runtime, 0,
SNDRV_PCM_HW_PARAM_FORMAT,
- rule_format, &runtime->hw,
+ rule_format, dpcm,
SNDRV_PCM_HW_PARAM_FORMAT, -1);
if (err < 0)
goto unlock;
err = snd_pcm_hw_rule_add(runtime, 0,
SNDRV_PCM_HW_PARAM_RATE,
- rule_rate, &runtime->hw,
+ rule_rate, dpcm,
SNDRV_PCM_HW_PARAM_RATE, -1);
if (err < 0)
goto unlock;
err = snd_pcm_hw_rule_add(runtime, 0,
SNDRV_PCM_HW_PARAM_CHANNELS,
- rule_channels, &runtime->hw,
+ rule_channels, dpcm,
SNDRV_PCM_HW_PARAM_CHANNELS, -1);
if (err < 0)
goto unlock;
@@ -724,6 +732,10 @@
else
runtime->hw = cable->hw;
unlock:
+ if (err < 0) {
+ free_cable(substream);
+ kfree(dpcm);
+ }
mutex_unlock(&loopback->cable_lock);
return err;
}
@@ -732,20 +744,10 @@
{
struct loopback *loopback = substream->private_data;
struct loopback_pcm *dpcm = substream->runtime->private_data;
- struct loopback_cable *cable;
- int dev = get_cable_index(substream);
loopback_timer_stop(dpcm);
mutex_lock(&loopback->cable_lock);
- cable = loopback->cables[substream->number][dev];
- if (cable->streams[!substream->stream]) {
- /* other stream is still alive */
- cable->streams[substream->stream] = NULL;
- } else {
- /* free the cable */
- loopback->cables[substream->number][dev] = NULL;
- kfree(cable);
- }
+ free_cable(substream);
mutex_unlock(&loopback->cable_lock);
return 0;
}
diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index c9af022..47c3e97 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -319,7 +319,7 @@
*/
int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops)
{
- if (WARN_ON(!hdac_acomp))
+ if (!hdac_acomp)
return -ENODEV;
hdac_acomp->audio_ops = aops;
diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index 80bbadc..d6e079f 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -408,6 +408,7 @@
/*SND_PCI_QUIRK(0x8086, 0x7270, "IMac 27 Inch", CS420X_IMAC27),*/
/* codec SSID */
+ SND_PCI_QUIRK(0x106b, 0x0600, "iMac 14,1", CS420X_IMAC27_122),
SND_PCI_QUIRK(0x106b, 0x1c00, "MacBookPro 8,1", CS420X_MBP81),
SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122),
SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101),
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index f2e4e99..2c3065c 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -261,6 +261,7 @@
CXT_FIXUP_HP_530,
CXT_FIXUP_CAP_MIX_AMP_5047,
CXT_FIXUP_MUTE_LED_EAPD,
+ CXT_FIXUP_HP_DOCK,
CXT_FIXUP_HP_SPECTRE,
CXT_FIXUP_HP_GATE_MIC,
};
@@ -778,6 +779,14 @@
.type = HDA_FIXUP_FUNC,
.v.func = cxt_fixup_mute_led_eapd,
},
+ [CXT_FIXUP_HP_DOCK] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x16, 0x21011020 }, /* line-out */
+ { 0x18, 0x2181103f }, /* line-in */
+ { }
+ }
+ },
[CXT_FIXUP_HP_SPECTRE] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
@@ -839,6 +848,7 @@
SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC),
SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC),
+ SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK),
SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
@@ -872,6 +882,7 @@
{ .id = CXT_PINCFG_LEMOTE_A1205, .name = "lemote-a1205" },
{ .id = CXT_FIXUP_OLPC_XO, .name = "olpc-xo" },
{ .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" },
+ { .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" },
{}
};
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index d7fa737..71a058f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4854,6 +4854,7 @@
ALC286_FIXUP_HP_GPIO_LED,
ALC280_FIXUP_HP_GPIO2_MIC_HOTKEY,
ALC280_FIXUP_HP_DOCK_PINS,
+ ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED,
ALC280_FIXUP_HP_9480M,
ALC288_FIXUP_DELL_HEADSET_MODE,
ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
@@ -5394,6 +5395,16 @@
.chained = true,
.chain_id = ALC280_FIXUP_HP_GPIO4
},
+ [ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x1b, 0x21011020 }, /* line-out */
+ { 0x18, 0x2181103f }, /* line-in */
+ { },
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HP_GPIO_MIC1_LED
+ },
[ALC280_FIXUP_HP_9480M] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc280_fixup_hp_9480m,
@@ -5606,6 +5617,7 @@
SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
+ SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -5646,7 +5658,7 @@
SND_PCI_QUIRK(0x103c, 0x2256, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
SND_PCI_QUIRK(0x103c, 0x2257, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
SND_PCI_QUIRK(0x103c, 0x2259, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
- SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
+ SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED),
SND_PCI_QUIRK(0x103c, 0x2260, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
SND_PCI_QUIRK(0x103c, 0x2263, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
SND_PCI_QUIRK(0x103c, 0x2264, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
@@ -5812,6 +5824,7 @@
{.id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, .name = "headset-mode-no-hp-mic"},
{.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"},
{.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"},
+ {.id = ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, .name = "hp-dock-gpio-mic1-led"},
{.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
{.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"},
{.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"},
@@ -5960,6 +5973,11 @@
{0x1b, 0x01011020},
{0x21, 0x02211010}),
SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x12, 0x90a60130},
+ {0x14, 0x90170110},
+ {0x1b, 0x01011020},
+ {0x21, 0x0221101f}),
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
{0x12, 0x90a60160},
{0x14, 0x90170120},
{0x21, 0x02211030}),
diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c
index c69e976..f886324 100644
--- a/sound/soc/codecs/da7218.c
+++ b/sound/soc/codecs/da7218.c
@@ -2519,7 +2519,7 @@
}
if (da7218->dev_id == DA7218_DEV_ID) {
- hpldet_np = of_find_node_by_name(np, "da7218_hpldet");
+ hpldet_np = of_get_child_by_name(np, "da7218_hpldet");
if (!hpldet_np)
return pdata;
diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c
index 712ed65..ebdf9bd 100644
--- a/sound/soc/codecs/pcm512x-spi.c
+++ b/sound/soc/codecs/pcm512x-spi.c
@@ -70,3 +70,7 @@
};
module_spi_driver(pcm512x_spi_driver);
+
+MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI");
+MODULE_AUTHOR("Mark Brown <broonie@kernel.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h
index 5acd5b6..f9b6c5a 100644
--- a/sound/soc/codecs/tlv320aic31xx.h
+++ b/sound/soc/codecs/tlv320aic31xx.h
@@ -115,7 +115,7 @@
/* INT2 interrupt control */
#define AIC31XX_INT2CTRL AIC31XX_REG(0, 49)
/* GPIO1 control */
-#define AIC31XX_GPIO1 AIC31XX_REG(0, 50)
+#define AIC31XX_GPIO1 AIC31XX_REG(0, 51)
#define AIC31XX_DACPRB AIC31XX_REG(0, 60)
/* ADC Instruction Set Register */
diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c
index a2104d6..26fd6a6 100644
--- a/sound/soc/codecs/twl4030.c
+++ b/sound/soc/codecs/twl4030.c
@@ -232,7 +232,7 @@
struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev);
struct device_node *twl4030_codec_node = NULL;
- twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node,
+ twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node,
"codec");
if (!pdata && twl4030_codec_node) {
@@ -241,9 +241,11 @@
GFP_KERNEL);
if (!pdata) {
dev_err(codec->dev, "Can not allocate memory\n");
+ of_node_put(twl4030_codec_node);
return NULL;
}
twl4030_setup_pdata_of(pdata, twl4030_codec_node);
+ of_node_put(twl4030_codec_node);
}
return pdata;
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 757af79..c03c9da 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -1465,7 +1465,7 @@
le64_to_cpu(footer->timestamp));
while (pos < firmware->size &&
- pos - firmware->size > sizeof(*region)) {
+ sizeof(*region) < firmware->size - pos) {
region = (void *)&(firmware->data[pos]);
region_name = "Unknown";
reg = 0;
@@ -1526,8 +1526,8 @@
regions, le32_to_cpu(region->len), offset,
region_name);
- if ((pos + le32_to_cpu(region->len) + sizeof(*region)) >
- firmware->size) {
+ if (le32_to_cpu(region->len) >
+ firmware->size - pos - sizeof(*region)) {
adsp_err(dsp,
"%s.%d: %s region len %d bytes exceeds file length %zu\n",
file, regions, region_name,
@@ -1992,7 +1992,7 @@
blocks = 0;
while (pos < firmware->size &&
- pos - firmware->size > sizeof(*blk)) {
+ sizeof(*blk) < firmware->size - pos) {
blk = (void *)(&firmware->data[pos]);
type = le16_to_cpu(blk->type);
@@ -2066,8 +2066,8 @@
}
if (reg) {
- if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) >
- firmware->size) {
+ if (le32_to_cpu(blk->len) >
+ firmware->size - pos - sizeof(*blk)) {
adsp_err(dsp,
"%s.%d: %s region len %d bytes exceeds file length %zu\n",
file, blocks, region_name,
diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index fde08660..1c03490 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -1467,12 +1467,6 @@
sizeof(fsl_ssi_ac97_dai));
fsl_ac97_data = ssi_private;
-
- ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
- if (ret) {
- dev_err(&pdev->dev, "could not set AC'97 ops\n");
- return ret;
- }
} else {
/* Initialize this copy of the CPU DAI driver structure */
memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template,
@@ -1583,6 +1577,14 @@
return ret;
}
+ if (fsl_ssi_is_ac97(ssi_private)) {
+ ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "could not set AC'97 ops\n");
+ goto error_ac97_ops;
+ }
+ }
+
ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component,
&ssi_private->cpu_dai_drv, 1);
if (ret) {
@@ -1666,6 +1668,10 @@
fsl_ssi_debugfs_remove(&ssi_private->dbg_stats);
error_asoc_register:
+ if (fsl_ssi_is_ac97(ssi_private))
+ snd_soc_set_ac97_ops(NULL);
+
+error_ac97_ops:
if (ssi_private->soc->imx)
fsl_ssi_imx_clean(pdev, ssi_private);
diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index f608f8d2..dd88c2c 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -232,13 +232,19 @@
snprintf(prop, sizeof(prop), "%scpu", prefix);
cpu = of_get_child_by_name(node, prop);
+ if (!cpu) {
+ ret = -EINVAL;
+ dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop);
+ goto dai_link_of_err;
+ }
+
snprintf(prop, sizeof(prop), "%splat", prefix);
plat = of_get_child_by_name(node, prop);
snprintf(prop, sizeof(prop), "%scodec", prefix);
codec = of_get_child_by_name(node, prop);
- if (!cpu || !codec) {
+ if (!codec) {
ret = -EINVAL;
dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop);
goto dai_link_of_err;
diff --git a/sound/soc/img/img-parallel-out.c b/sound/soc/img/img-parallel-out.c
index c1610a0..3cf522d 100644
--- a/sound/soc/img/img-parallel-out.c
+++ b/sound/soc/img/img-parallel-out.c
@@ -166,9 +166,11 @@
return -EINVAL;
}
+ pm_runtime_get_sync(prl->dev);
reg = img_prl_out_readl(prl, IMG_PRL_OUT_CTL);
reg = (reg & ~IMG_PRL_OUT_CTL_EDGE_MASK) | control_set;
img_prl_out_writel(prl, reg, IMG_PRL_OUT_CTL);
+ pm_runtime_put(prl->dev);
return 0;
}
diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
index 3f8e6f0..dcf0369 100644
--- a/sound/soc/intel/skylake/skl-nhlt.c
+++ b/sound/soc/intel/skylake/skl-nhlt.c
@@ -41,7 +41,8 @@
obj = acpi_evaluate_dsm(handle, OSC_UUID, 1, 1, NULL);
if (obj && obj->type == ACPI_TYPE_BUFFER) {
nhlt_ptr = (struct nhlt_resource_desc *)obj->buffer.pointer;
- nhlt_table = (struct nhlt_acpi_table *)
+ if (nhlt_ptr->length)
+ nhlt_table = (struct nhlt_acpi_table *)
memremap(nhlt_ptr->min_addr, nhlt_ptr->length,
MEMREMAP_WB);
ACPI_FREE(obj);
diff --git a/sound/soc/intel/skylake/skl-sst-utils.c b/sound/soc/intel/skylake/skl-sst-utils.c
index ea162fb..d5adc04 100644
--- a/sound/soc/intel/skylake/skl-sst-utils.c
+++ b/sound/soc/intel/skylake/skl-sst-utils.c
@@ -295,6 +295,7 @@
struct uuid_module *module;
struct firmware stripped_fw;
unsigned int safe_file;
+ int ret = 0;
/* Get the FW pointer to derive ADSP header */
stripped_fw.data = fw->data;
@@ -343,8 +344,10 @@
for (i = 0; i < num_entry; i++, mod_entry++) {
module = kzalloc(sizeof(*module), GFP_KERNEL);
- if (!module)
- return -ENOMEM;
+ if (!module) {
+ ret = -ENOMEM;
+ goto free_uuid_list;
+ }
uuid_bin = (uuid_le *)mod_entry->uuid.id;
memcpy(&module->uuid, uuid_bin, sizeof(module->uuid));
@@ -355,8 +358,8 @@
size = sizeof(int) * mod_entry->instance_max_count;
module->instance_id = devm_kzalloc(ctx->dev, size, GFP_KERNEL);
if (!module->instance_id) {
- kfree(module);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto free_uuid_list;
}
list_add_tail(&module->list, &skl->uuid_list);
@@ -367,6 +370,10 @@
}
return 0;
+
+free_uuid_list:
+ skl_freeup_uuid_list(skl);
+ return ret;
}
void skl_freeup_uuid_list(struct skl_sst *ctx)
diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c
index 974915c..08bfee4 100644
--- a/sound/soc/rockchip/rockchip_i2s.c
+++ b/sound/soc/rockchip/rockchip_i2s.c
@@ -476,6 +476,7 @@
case I2S_INTCR:
case I2S_XFER:
case I2S_CLR:
+ case I2S_TXDR:
case I2S_RXDR:
case I2S_FIFOLR:
case I2S_INTSR:
@@ -490,6 +491,9 @@
switch (reg) {
case I2S_INTSR:
case I2S_CLR:
+ case I2S_FIFOLR:
+ case I2S_TXDR:
+ case I2S_RXDR:
return true;
default:
return false;
@@ -499,6 +503,8 @@
static bool rockchip_i2s_precious_reg(struct device *dev, unsigned int reg)
{
switch (reg) {
+ case I2S_RXDR:
+ return true;
default:
return false;
}
diff --git a/sound/soc/sh/rcar/cmd.c b/sound/soc/sh/rcar/cmd.c
index abb5eaa..7d92a24 100644
--- a/sound/soc/sh/rcar/cmd.c
+++ b/sound/soc/sh/rcar/cmd.c
@@ -31,23 +31,24 @@
struct rsnd_mod *mix = rsnd_io_to_mod_mix(io);
struct device *dev = rsnd_priv_to_dev(priv);
u32 data;
+ u32 path[] = {
+ [1] = 1 << 0,
+ [5] = 1 << 8,
+ [6] = 1 << 12,
+ [9] = 1 << 15,
+ };
if (!mix && !dvc)
return 0;
+ if (ARRAY_SIZE(path) < rsnd_mod_id(mod) + 1)
+ return -ENXIO;
+
if (mix) {
struct rsnd_dai *rdai;
struct rsnd_mod *src;
struct rsnd_dai_stream *tio;
int i;
- u32 path[] = {
- [0] = 0,
- [1] = 1 << 0,
- [2] = 0,
- [3] = 0,
- [4] = 0,
- [5] = 1 << 8
- };
/*
* it is assuming that integrater is well understanding about
@@ -70,16 +71,19 @@
} else {
struct rsnd_mod *src = rsnd_io_to_mod_src(io);
- u32 path[] = {
- [0] = 0x30000,
- [1] = 0x30001,
- [2] = 0x40000,
- [3] = 0x10000,
- [4] = 0x20000,
- [5] = 0x40100
+ u8 cmd_case[] = {
+ [0] = 0x3,
+ [1] = 0x3,
+ [2] = 0x4,
+ [3] = 0x1,
+ [4] = 0x2,
+ [5] = 0x4,
+ [6] = 0x1,
+ [9] = 0x2,
};
- data = path[rsnd_mod_id(src)];
+ data = path[rsnd_mod_id(src)] |
+ cmd_case[rsnd_mod_id(src)] << 16;
}
dev_dbg(dev, "ctu/mix path = 0x%08x", data);
diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c
index 6bc93cb..edeb74a 100644
--- a/sound/soc/sh/rcar/dma.c
+++ b/sound/soc/sh/rcar/dma.c
@@ -361,6 +361,20 @@
return ioread32(rsnd_dmapp_addr(dmac, dma, reg));
}
+static void rsnd_dmapp_bset(struct rsnd_dma *dma, u32 data, u32 mask, u32 reg)
+{
+ struct rsnd_mod *mod = rsnd_mod_get(dma);
+ struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
+ struct rsnd_dma_ctrl *dmac = rsnd_priv_to_dmac(priv);
+ volatile void __iomem *addr = rsnd_dmapp_addr(dmac, dma, reg);
+ u32 val = ioread32(addr);
+
+ val &= ~mask;
+ val |= (data & mask);
+
+ iowrite32(val, addr);
+}
+
static int rsnd_dmapp_stop(struct rsnd_mod *mod,
struct rsnd_dai_stream *io,
struct rsnd_priv *priv)
@@ -368,10 +382,10 @@
struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
int i;
- rsnd_dmapp_write(dma, 0, PDMACHCR);
+ rsnd_dmapp_bset(dma, 0, PDMACHCR_DE, PDMACHCR);
for (i = 0; i < 1024; i++) {
- if (0 == rsnd_dmapp_read(dma, PDMACHCR))
+ if (0 == (rsnd_dmapp_read(dma, PDMACHCR) & PDMACHCR_DE))
return 0;
udelay(1);
}
diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c
index 6cb6db0..a9a43ac 100644
--- a/sound/soc/sh/rcar/ssi.c
+++ b/sound/soc/sh/rcar/ssi.c
@@ -172,10 +172,15 @@
{
struct rsnd_mod *ssi_mod = rsnd_io_to_mod_ssi(io);
struct rsnd_mod *ssi_parent_mod = rsnd_io_to_mod_ssip(io);
+ u32 mods;
- return rsnd_ssi_multi_slaves_runtime(io) |
- 1 << rsnd_mod_id(ssi_mod) |
- 1 << rsnd_mod_id(ssi_parent_mod);
+ mods = rsnd_ssi_multi_slaves_runtime(io) |
+ 1 << rsnd_mod_id(ssi_mod);
+
+ if (ssi_parent_mod)
+ mods |= 1 << rsnd_mod_id(ssi_parent_mod);
+
+ return mods;
}
u32 rsnd_ssi_multi_slaves_runtime(struct rsnd_dai_stream *io)
@@ -694,9 +699,14 @@
struct rsnd_priv *priv)
{
struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
+ struct rsnd_mod *pure_ssi_mod = rsnd_io_to_mod_ssi(io);
struct device *dev = rsnd_priv_to_dev(priv);
int irq = ssi->irq;
+ /* Do nothing if non SSI (= SSI parent, multi SSI) mod */
+ if (pure_ssi_mod != mod)
+ return 0;
+
/* PIO will request IRQ again */
devm_free_irq(dev, irq, mod);
diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c
index 6f9b388..3f95d6b 100644
--- a/sound/soc/sh/rcar/ssiu.c
+++ b/sound/soc/sh/rcar/ssiu.c
@@ -44,7 +44,11 @@
mask1 = (1 << 4) | (1 << 20); /* mask sync bit */
mask2 = (1 << 4); /* mask sync bit */
val1 = val2 = 0;
- if (rsnd_ssi_is_pin_sharing(io)) {
+ if (id == 8) {
+ /*
+ * SSI8 pin is sharing with SSI7, nothing to do.
+ */
+ } else if (rsnd_ssi_is_pin_sharing(io)) {
int shift = -1;
switch (id) {
diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c
index 0e1c3ee..9735b4c 100644
--- a/sound/soc/sti/uniperif_reader.c
+++ b/sound/soc/sti/uniperif_reader.c
@@ -364,6 +364,8 @@
struct uniperif *reader = priv->dai_data.uni;
int ret;
+ reader->substream = substream;
+
if (!UNIPERIF_TYPE_IS_TDM(reader))
return 0;
@@ -393,6 +395,7 @@
/* Stop the reader */
uni_reader_stop(reader);
}
+ reader->substream = NULL;
}
static const struct snd_soc_dai_ops uni_reader_dai_ops = {
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 9133d3e..08015c1 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -204,6 +204,10 @@
int index, char *buf, int maxlen)
{
int len = usb_string(state->chip->dev, index, buf, maxlen - 1);
+
+ if (len < 0)
+ return 0;
+
buf[len] = 0;
return len;
}
@@ -2163,19 +2167,25 @@
kctl->private_value = (unsigned long)namelist;
kctl->private_free = usb_mixer_selector_elem_free;
- nameid = uac_selector_unit_iSelector(desc);
+ /* check the static mapping table at first */
len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
- if (len)
- ;
- else if (nameid)
- snd_usb_copy_string_desc(state, nameid, kctl->id.name,
- sizeof(kctl->id.name));
- else {
- len = get_term_name(state, &state->oterm,
+ if (!len) {
+ /* no mapping ? */
+ /* if iSelector is given, use it */
+ nameid = uac_selector_unit_iSelector(desc);
+ if (nameid)
+ len = snd_usb_copy_string_desc(state, nameid,
+ kctl->id.name,
+ sizeof(kctl->id.name));
+ /* ... or pick up the terminal name at next */
+ if (!len)
+ len = get_term_name(state, &state->oterm,
kctl->id.name, sizeof(kctl->id.name), 0);
+ /* ... or use the fixed string "USB" as the last resort */
if (!len)
strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
+ /* and add the proper suffix */
if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR)
append_ctl_name(kctl, " Clock Source");
else if ((state->oterm.type & 0xff00) == 0x0100)
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 7613b9e..1cd7f8b 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1170,10 +1170,11 @@
/* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch
* between PCM/DOP and native DSD mode
*/
-static bool is_teac_50X_dac(unsigned int id)
+static bool is_teac_dsd_dac(unsigned int id)
{
switch (id) {
case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */
+ case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */
return true;
}
return false;
@@ -1206,7 +1207,7 @@
break;
}
mdelay(20);
- } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) {
+ } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) {
/* Vendor mode switch cmd is required. */
switch (fmt->altsetting) {
case 3: /* DSD mode (DSD_U32) requested */
@@ -1376,7 +1377,7 @@
}
/* TEAC devices with USB DAC functionality */
- if (is_teac_50X_dac(chip->usb_id)) {
+ if (is_teac_dsd_dac(chip->usb_id)) {
if (fp->altsetting == 3)
return SNDRV_PCM_FMTBIT_DSD_U32_BE;
}
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index a396292..f79669a 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -197,6 +197,9 @@
#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+#define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index 1c14c25..4b36323 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -23,6 +23,7 @@
#include <getopt.h>
#include <inttypes.h>
#include <sys/ioctl.h>
+#include <sys/types.h>
#include <linux/gpio.h>
int monitor_device(const char *device_name,
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index bc7adb8..60a94b3 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -193,11 +193,14 @@
for (;;) {
readp = &record[records_read];
records_read += fread(readp, sizeof(struct kvp_record),
- ENTRIES_PER_BLOCK * num_blocks,
- filep);
+ ENTRIES_PER_BLOCK * num_blocks - records_read,
+ filep);
if (ferror(filep)) {
- syslog(LOG_ERR, "Failed to read file, pool: %d", pool);
+ syslog(LOG_ERR,
+ "Failed to read file, pool: %d; error: %d %s",
+ pool, errno, strerror(errno));
+ kvp_release_lock(pool);
exit(EXIT_FAILURE);
}
@@ -210,6 +213,7 @@
if (record == NULL) {
syslog(LOG_ERR, "malloc failed");
+ kvp_release_lock(pool);
exit(EXIT_FAILURE);
}
continue;
@@ -224,15 +228,11 @@
fclose(filep);
kvp_release_lock(pool);
}
+
static int kvp_file_init(void)
{
int fd;
- FILE *filep;
- size_t records_read;
char *fname;
- struct kvp_record *record;
- struct kvp_record *readp;
- int num_blocks;
int i;
int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
@@ -246,61 +246,19 @@
for (i = 0; i < KVP_POOL_COUNT; i++) {
fname = kvp_file_info[i].fname;
- records_read = 0;
- num_blocks = 1;
sprintf(fname, "%s/.kvp_pool_%d", KVP_CONFIG_LOC, i);
fd = open(fname, O_RDWR | O_CREAT | O_CLOEXEC, 0644 /* rw-r--r-- */);
if (fd == -1)
return 1;
-
- filep = fopen(fname, "re");
- if (!filep) {
- close(fd);
- return 1;
- }
-
- record = malloc(alloc_unit * num_blocks);
- if (record == NULL) {
- fclose(filep);
- close(fd);
- return 1;
- }
- for (;;) {
- readp = &record[records_read];
- records_read += fread(readp, sizeof(struct kvp_record),
- ENTRIES_PER_BLOCK,
- filep);
-
- if (ferror(filep)) {
- syslog(LOG_ERR, "Failed to read file, pool: %d",
- i);
- exit(EXIT_FAILURE);
- }
-
- if (!feof(filep)) {
- /*
- * We have more data to read.
- */
- num_blocks++;
- record = realloc(record, alloc_unit *
- num_blocks);
- if (record == NULL) {
- fclose(filep);
- close(fd);
- return 1;
- }
- continue;
- }
- break;
- }
kvp_file_info[i].fd = fd;
- kvp_file_info[i].num_blocks = num_blocks;
- kvp_file_info[i].records = record;
- kvp_file_info[i].num_records = records_read;
- fclose(filep);
-
+ kvp_file_info[i].num_blocks = 1;
+ kvp_file_info[i].records = malloc(alloc_unit);
+ if (kvp_file_info[i].records == NULL)
+ return 1;
+ kvp_file_info[i].num_records = 0;
+ kvp_update_mem_state(i);
}
return 0;
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
index 51334ed..f306a76 100644
--- a/tools/include/linux/poison.h
+++ b/tools/include/linux/poison.h
@@ -14,6 +14,10 @@
# define POISON_POINTER_DELTA 0
#endif
+#ifdef __cplusplus
+#define LIST_POISON1 NULL
+#define LIST_POISON2 NULL
+#else
/*
* These are non-NULL pointers that will result in page faults
* under normal circumstances, used to verify that nobody uses
@@ -21,6 +25,7 @@
*/
#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA)
#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA)
+#endif
/********** include/linux/timer.h **********/
/*
diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
index 767be7c..1754e09 100644
--- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
@@ -896,7 +896,7 @@
GrpTable: Grp3_1
0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
2: NOT Eb
3: NEG Eb
4: MUL AL,Eb
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index b8dadb0..a688a85 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -51,7 +51,7 @@
unsigned int len, state;
unsigned char type;
unsigned long immediate;
- bool alt_group, visited;
+ bool alt_group, visited, ignore_alts;
struct symbol *call_dest;
struct instruction *jump_dest;
struct list_head alts;
@@ -353,6 +353,40 @@
}
/*
+ * FIXME: For now, just ignore any alternatives which add retpolines. This is
+ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
+ * But it at least allows objtool to understand the control flow *around* the
+ * retpoline.
+ */
+static int add_nospec_ignores(struct objtool_file *file)
+{
+ struct section *sec;
+ struct rela *rela;
+ struct instruction *insn;
+
+ sec = find_section_by_name(file->elf, ".rela.discard.nospec");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ if (rela->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s", sec->name);
+ return -1;
+ }
+
+ insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!insn) {
+ WARN("bad .discard.nospec entry");
+ return -1;
+ }
+
+ insn->ignore_alts = true;
+ }
+
+ return 0;
+}
+
+/*
* Find the destination instructions for all jumps.
*/
static int add_jump_destinations(struct objtool_file *file)
@@ -382,6 +416,13 @@
} else if (rela->sym->sec->idx) {
dest_sec = rela->sym->sec;
dest_off = rela->sym->sym.st_value + rela->addend + 4;
+ } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
+ /*
+ * Retpoline jumps are really dynamic jumps in
+ * disguise, so convert them accordingly.
+ */
+ insn->type = INSN_JUMP_DYNAMIC;
+ continue;
} else {
/* sibling call */
insn->jump_dest = 0;
@@ -428,11 +469,18 @@
dest_off = insn->offset + insn->len + insn->immediate;
insn->call_dest = find_symbol_by_offset(insn->sec,
dest_off);
+ /*
+ * FIXME: Thanks to retpolines, it's now considered
+ * normal for a function to call within itself. So
+ * disable this warning for now.
+ */
+#if 0
if (!insn->call_dest) {
WARN_FUNC("can't find call dest symbol at offset 0x%lx",
insn->sec, insn->offset, dest_off);
return -1;
}
+#endif
} else if (rela->sym->type == STT_SECTION) {
insn->call_dest = find_symbol_by_offset(rela->sym->sec,
rela->addend+4);
@@ -594,12 +642,6 @@
return ret;
list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
- alt = malloc(sizeof(*alt));
- if (!alt) {
- WARN("malloc failed");
- ret = -1;
- goto out;
- }
orig_insn = find_insn(file, special_alt->orig_sec,
special_alt->orig_off);
@@ -610,6 +652,10 @@
goto out;
}
+ /* Ignore retpoline alternatives. */
+ if (orig_insn->ignore_alts)
+ continue;
+
new_insn = NULL;
if (!special_alt->group || special_alt->new_len) {
new_insn = find_insn(file, special_alt->new_sec,
@@ -635,6 +681,13 @@
goto out;
}
+ alt = malloc(sizeof(*alt));
+ if (!alt) {
+ WARN("malloc failed");
+ ret = -1;
+ goto out;
+ }
+
alt->insn = new_insn;
list_add_tail(&alt->list, &orig_insn->alts);
@@ -854,6 +907,10 @@
add_ignores(file);
+ ret = add_nospec_ignores(file);
+ if (ret)
+ return ret;
+
ret = add_jump_destinations(file);
if (ret)
return ret;
@@ -1173,6 +1230,14 @@
for_each_insn(file, insn) {
if (!insn->visited && insn->type == INSN_RETURN) {
+
+ /*
+ * Don't warn about call instructions in unvisited
+ * retpoline alternatives.
+ */
+ if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+ continue;
+
WARN_FUNC("return instruction outside of a callable function",
insn->sec, insn->offset);
warnings++;
@@ -1229,7 +1294,7 @@
INIT_LIST_HEAD(&file.insn_list);
hash_init(file.insn_hash);
- file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard");
+ file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
file.rodata = find_section_by_name(file.elf, ".rodata");
file.ignore_unreachables = false;
file.c_file = find_section_by_name(file.elf, ".comment");
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index d897702..faacf0c 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -26,6 +26,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <errno.h>
#include "elf.h"
#include "warn.h"
@@ -370,7 +371,8 @@
elf->fd = open(name, O_RDONLY);
if (elf->fd == -1) {
- perror("open");
+ fprintf(stderr, "objtool: Can't open '%s': %s\n",
+ name, strerror(errno));
goto err;
}
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index cffdd9c..ff37531 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -19,18 +19,18 @@
include $(srctree)/tools/scripts/Makefile.arch
-$(call detected_var,ARCH)
+$(call detected_var,SRCARCH)
NO_PERF_REGS := 1
# Additional ARCH settings for ppc
-ifeq ($(ARCH),powerpc)
+ifeq ($(SRCARCH),powerpc)
NO_PERF_REGS := 0
LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
endif
# Additional ARCH settings for x86
-ifeq ($(ARCH),x86)
+ifeq ($(SRCARCH),x86)
$(call detected,CONFIG_X86)
ifeq (${IS_64_BIT}, 1)
CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
@@ -43,12 +43,12 @@
NO_PERF_REGS := 0
endif
-ifeq ($(ARCH),arm)
+ifeq ($(SRCARCH),arm)
NO_PERF_REGS := 0
LIBUNWIND_LIBS = -lunwind -lunwind-arm
endif
-ifeq ($(ARCH),arm64)
+ifeq ($(SRCARCH),arm64)
NO_PERF_REGS := 0
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
endif
@@ -61,7 +61,7 @@
# Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures
# to the check.
-ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm))
NO_LIBDW_DWARF_UNWIND := 1
endif
@@ -115,9 +115,9 @@
FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
-FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
+FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
# include ARCH specific config
--include $(src-perf)/arch/$(ARCH)/Makefile
+-include $(src-perf)/arch/$(SRCARCH)/Makefile
ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
@@ -205,12 +205,12 @@
endif
CFLAGS += -I$(src-perf)/util/include
-CFLAGS += -I$(src-perf)/arch/$(ARCH)/include
+CFLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
CFLAGS += -I$(srctree)/tools/include/uapi
CFLAGS += -I$(srctree)/tools/include/
-CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi
-CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/
-CFLAGS += -I$(srctree)/tools/arch/$(ARCH)/
+CFLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi
+CFLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/
+CFLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/
# $(obj-perf) for generated common-cmds.h
# $(obj-perf)/util for generated bison/flex headers
@@ -321,7 +321,7 @@
ifndef NO_DWARF
ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
- msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
+ msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
NO_DWARF := 1
else
CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
@@ -346,7 +346,7 @@
CFLAGS += -DHAVE_BPF_PROLOGUE
$(call detected,CONFIG_BPF_PROLOGUE)
else
- msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset());
+ msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset());
endif
else
msg := $(warning DWARF support is off, BPF prologue is disabled);
@@ -372,7 +372,7 @@
endif
endif
-ifeq ($(ARCH),powerpc)
+ifeq ($(SRCARCH),powerpc)
ifndef NO_DWARF
CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
endif
@@ -453,7 +453,7 @@
endif
ifndef NO_LOCAL_LIBUNWIND
- ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
+ ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
$(call feature_check,libunwind-debug-frame)
ifneq ($(feature-libunwind-debug-frame), 1)
msg := $(warning No debug_frame support found in libunwind);
@@ -717,7 +717,7 @@
NO_PERF_READ_VDSO32 := 1
endif
endif
- ifneq ($(ARCH), x86)
+ ifneq ($(SRCARCH), x86)
NO_PERF_READ_VDSOX32 := 1
endif
ifndef NO_PERF_READ_VDSOX32
@@ -746,7 +746,7 @@
endif
ifndef NO_AUXTRACE
- ifeq ($(ARCH),x86)
+ ifeq ($(SRCARCH),x86)
ifeq ($(feature-get_cpuid), 0)
msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
NO_AUXTRACE := 1
@@ -793,7 +793,7 @@
ETC_PERFCONFIG = etc/perfconfig
endif
ifndef lib
-ifeq ($(ARCH)$(IS_64_BIT), x861)
+ifeq ($(SRCARCH)$(IS_64_BIT), x861)
lib = lib64
else
lib = lib
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index ef52d1e..2b92ffe 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -192,7 +192,7 @@
ifeq ($(config),0)
include $(srctree)/tools/scripts/Makefile.arch
--include arch/$(ARCH)/Makefile
+-include arch/$(SRCARCH)/Makefile
endif
# The FEATURE_DUMP_EXPORT holds location of the actual
diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build
index 109eb75..d9b6af8 100644
--- a/tools/perf/arch/Build
+++ b/tools/perf/arch/Build
@@ -1,2 +1,2 @@
libperf-y += common.o
-libperf-y += $(ARCH)/
+libperf-y += $(SRCARCH)/
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 9213a12..999a4e8 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -2,7 +2,7 @@
jevents-y += json.o jsmn.o jevents.o
pmu-events-y += pmu-events.o
-JDIR = pmu-events/arch/$(ARCH)
+JDIR = pmu-events/arch/$(SRCARCH)
JSON = $(shell [ -d $(JDIR) ] && \
find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
#
@@ -10,4 +10,4 @@
# directory and create tables in pmu-events.c.
#
$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS)
- $(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
+ $(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 8a4ce49..546250a 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -71,7 +71,7 @@
$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
$(Q)echo ';' >> $@
-ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc))
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
endif
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 28d1605..b60a6fd 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -150,7 +150,7 @@
snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
d, d, perf, vcnt, v);
- return system(cmd);
+ return system(cmd) ? TEST_FAIL : TEST_OK;
}
int test__attr(int subtest __maybe_unused)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 5337f49..28bdb48 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -826,7 +826,7 @@
/*
* default get_cpuid(): nothing gets recorded
- * actual implementation must be in arch/$(ARCH)/util/header.c
+ * actual implementation must be in arch/$(SRCARCH)/util/header.c
*/
int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
{
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index f7b35e1..f199d5b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -202,7 +202,7 @@
/* Last entry */
if (curr->end == curr->start)
- curr->end = roundup(curr->start, 4096);
+ curr->end = roundup(curr->start, 4096) + 4096;
}
void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c
index c25a74a..2bb3eef 100644
--- a/tools/power/cpupower/bench/system.c
+++ b/tools/power/cpupower/bench/system.c
@@ -61,7 +61,7 @@
dprintf("set %s as cpufreq governor\n", governor);
- if (cpupower_is_cpu_online(cpu) != 0) {
+ if (cpupower_is_cpu_online(cpu) != 1) {
perror("cpufreq_cpu_exists");
fprintf(stderr, "error: cpu %u does not exist\n", cpu);
return -1;
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index 1b5da00..5b3205f 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -130,15 +130,18 @@
{
int num;
char *tmp;
+ int this_cpu;
+
+ this_cpu = sched_getcpu();
/* Assume idle state count is the same for all CPUs */
- cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(0);
+ cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(this_cpu);
if (cpuidle_sysfs_monitor.hw_states_num <= 0)
return NULL;
for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) {
- tmp = cpuidle_state_name(0, num);
+ tmp = cpuidle_state_name(this_cpu, num);
if (tmp == NULL)
continue;
@@ -146,7 +149,7 @@
strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1);
free(tmp);
- tmp = cpuidle_state_desc(0, num);
+ tmp = cpuidle_state_desc(this_cpu, num);
if (tmp == NULL)
continue;
strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1);
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index 248a820..66d31de 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -114,9 +114,11 @@
rc = run_test(test_function, name);
- if (rc == MAGIC_SKIP_RETURN_VALUE)
+ if (rc == MAGIC_SKIP_RETURN_VALUE) {
test_skip(name);
- else
+ /* so that skipped test is not marked as failed */
+ rc = 0;
+ } else
test_finish(name, rc);
return rc;
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index bbab7f4..d116a19 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,5 +1,9 @@
# Makefile for vm selftests
+ifndef OUTPUT
+ OUTPUT := $(shell pwd)
+endif
+
CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
BINARIES = compaction_test
BINARIES += hugepage-mmap
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 6300c1a..4af37bf 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -6,7 +6,7 @@
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \
- protection_keys
+ protection_keys test_vsyscall
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 9b4610c..f249e04 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -245,7 +245,7 @@
long ret;
asm volatile ("int $0x80"
: "=a" (ret) : "a" (243), "b" (low_desc)
- : "flags");
+ : "r8", "r9", "r10", "r11");
memcpy(&desc, low_desc, sizeof(desc));
munmap(low_desc, sizeof(desc));
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index e717fed..ac1a7a3 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -45,6 +45,12 @@
#define AR_DB (1 << 22)
#define AR_G (1 << 23)
+#ifdef __x86_64__
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define INT80_CLOBBERS
+#endif
+
static int nerrs;
/* Points to an array of 1024 ints, each holding its own index. */
@@ -360,9 +366,24 @@
install_invalid(&desc, false);
desc.seg_not_present = 0;
- desc.read_exec_only = 0;
desc.seg_32bit = 1;
+ desc.read_exec_only = 0;
+ desc.limit = 0xfffff;
+
install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
+
+ desc.limit_in_pages = 1;
+
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G);
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G);
+ desc.contents = 1;
+ desc.read_exec_only = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
+
+ desc.limit = 0;
install_invalid(&desc, true);
}
@@ -634,7 +655,7 @@
asm volatile ("int $0x80"
: "=a" (ret), "+m" (low_user_desc) :
"a" (243), "b" (low_user_desc)
- : "flags");
+ : INT80_CLOBBERS);
return ret;
}
@@ -703,7 +724,7 @@
"+a" (eax)
: "m" (low_user_desc_clear),
[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
- : "flags");
+ : INT80_CLOBBERS);
if (sel != 0) {
result = "FAIL";
@@ -734,7 +755,7 @@
"+a" (eax)
: "m" (low_user_desc_clear),
[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
- : "flags");
+ : INT80_CLOBBERS);
if (sel != 0) {
result = "FAIL";
@@ -767,7 +788,7 @@
"+a" (eax)
: "m" (low_user_desc_clear),
[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
- : "flags");
+ : INT80_CLOBBERS);
#ifdef __x86_64__
syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
@@ -820,7 +841,7 @@
"+a" (eax)
: "m" (low_user_desc_clear),
[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
- : "flags");
+ : INT80_CLOBBERS);
#ifdef __x86_64__
syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
index 093c190..28b3c7c 100644
--- a/tools/testing/selftests/x86/mpx-hw.h
+++ b/tools/testing/selftests/x86/mpx-hw.h
@@ -51,14 +51,14 @@
struct mpx_bd_entry {
union {
char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
- void *contents[1];
+ void *contents[0];
};
} __attribute__((packed));
struct mpx_bt_entry {
union {
char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
- unsigned long contents[1];
+ unsigned long contents[0];
};
} __attribute__((packed));
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index b037ce9c..eaea924 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -58,7 +58,8 @@
asm volatile ("int $0x80"
: "+a" (args->nr),
"+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
- "+S" (args->arg3), "+D" (args->arg4), "+r" (bp));
+ "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)
+ : : "r8", "r9", "r10", "r11");
args->arg5 = bp;
#else
sys32_helper(args, int80_and_ret);
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 50c2635..a48da95 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -56,9 +56,11 @@
#ifdef __x86_64__
# define REG_IP REG_RIP
# define WIDTH "q"
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
#else
# define REG_IP REG_EIP
# define WIDTH "l"
+# define INT80_CLOBBERS
#endif
static unsigned long get_eflags(void)
@@ -140,7 +142,8 @@
printf("[RUN]\tSet TF and check int80\n");
set_eflags(get_eflags() | X86_EFLAGS_TF);
- asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid));
+ asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
+ : INT80_CLOBBERS);
check_result();
/*
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
new file mode 100644
index 0000000..6e0bd52
--- /dev/null
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <errno.h>
+#include <err.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <setjmp.h>
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+# define SYS_getcpu 309
+# else
+# define SYS_getcpu 318
+# endif
+#endif
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+/* vsyscalls and vDSO */
+bool should_read_vsyscall = false;
+
+typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
+gtod_t vdso_gtod;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+vgettime_t vdso_gettime;
+
+typedef long (*time_func_t)(time_t *t);
+time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
+time_func_t vdso_time;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+static void init_vdso(void)
+{
+ void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso) {
+ printf("[WARN]\tfailed to find vDSO\n");
+ return;
+ }
+
+ vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
+ if (!vdso_gtod)
+ printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
+
+ vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ if (!vdso_gettime)
+ printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
+
+ vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
+ if (!vdso_time)
+ printf("[WARN]\tfailed to find time in vDSO\n");
+
+ vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+ if (!vdso_getcpu) {
+ /* getcpu() was never wired up in the 32-bit vDSO. */
+ printf("[%s]\tfailed to find getcpu in vDSO\n",
+ sizeof(long) == 8 ? "WARN" : "NOTE");
+ }
+}
+
+static int init_vsys(void)
+{
+#ifdef __x86_64__
+ int nerrs = 0;
+ FILE *maps;
+ char line[128];
+ bool found = false;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
+ should_read_vsyscall = true;
+ return 0;
+ }
+
+ while (fgets(line, sizeof(line), maps)) {
+ char r, x;
+ void *start, *end;
+ char name[128];
+ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+ &start, &end, &r, &x, name) != 5)
+ continue;
+
+ if (strcmp(name, "[vsyscall]"))
+ continue;
+
+ printf("\tvsyscall map: %s", line);
+
+ if (start != (void *)0xffffffffff600000 ||
+ end != (void *)0xffffffffff601000) {
+ printf("[FAIL]\taddress range is nonsense\n");
+ nerrs++;
+ }
+
+ printf("\tvsyscall permissions are %c-%c\n", r, x);
+ should_read_vsyscall = (r == 'r');
+ if (x != 'x') {
+ vgtod = NULL;
+ vtime = NULL;
+ vgetcpu = NULL;
+ }
+
+ found = true;
+ break;
+ }
+
+ fclose(maps);
+
+ if (!found) {
+ printf("\tno vsyscall map in /proc/self/maps\n");
+ should_read_vsyscall = false;
+ vgtod = NULL;
+ vtime = NULL;
+ vgetcpu = NULL;
+ }
+
+ return nerrs;
+#else
+ return 0;
+#endif
+}
+
+/* syscalls */
+static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
+{
+ return syscall(SYS_gettimeofday, tv, tz);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ return syscall(SYS_clock_gettime, id, ts);
+}
+
+static inline long sys_time(time_t *t)
+{
+ return syscall(SYS_time, t);
+}
+
+static inline long sys_getcpu(unsigned * cpu, unsigned * node,
+ void* cache)
+{
+ return syscall(SYS_getcpu, cpu, node, cache);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static double tv_diff(const struct timeval *a, const struct timeval *b)
+{
+ return (double)(a->tv_sec - b->tv_sec) +
+ (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
+}
+
+static int check_gtod(const struct timeval *tv_sys1,
+ const struct timeval *tv_sys2,
+ const struct timezone *tz_sys,
+ const char *which,
+ const struct timeval *tv_other,
+ const struct timezone *tz_other)
+{
+ int nerrs = 0;
+ double d1, d2;
+
+ if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
+ printf("[FAIL] %s tz mismatch\n", which);
+ nerrs++;
+ }
+
+ d1 = tv_diff(tv_other, tv_sys1);
+ d2 = tv_diff(tv_sys2, tv_other);
+ printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
+
+ if (d1 < 0 || d2 < 0) {
+ printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
+ nerrs++;
+ } else {
+ printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
+ }
+
+ return nerrs;
+}
+
+static int test_gtod(void)
+{
+ struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
+ struct timezone tz_sys, tz_vdso, tz_vsys;
+ long ret_vdso = -1;
+ long ret_vsys = -1;
+ int nerrs = 0;
+
+ printf("[RUN]\ttest gettimeofday()\n");
+
+ if (sys_gtod(&tv_sys1, &tz_sys) != 0)
+ err(1, "syscall gettimeofday");
+ if (vdso_gtod)
+ ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
+ if (vgtod)
+ ret_vsys = vgtod(&tv_vsys, &tz_vsys);
+ if (sys_gtod(&tv_sys2, &tz_sys) != 0)
+ err(1, "syscall gettimeofday");
+
+ if (vdso_gtod) {
+ if (ret_vdso == 0) {
+ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
+ } else {
+ printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
+ nerrs++;
+ }
+ }
+
+ if (vgtod) {
+ if (ret_vsys == 0) {
+ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
+ } else {
+ printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
+ nerrs++;
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_time(void) {
+ int nerrs = 0;
+
+ printf("[RUN]\ttest time()\n");
+ long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
+ long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
+ t_sys1 = sys_time(&t2_sys1);
+ if (vdso_time)
+ t_vdso = vdso_time(&t2_vdso);
+ if (vtime)
+ t_vsys = vtime(&t2_vsys);
+ t_sys2 = sys_time(&t2_sys2);
+ if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
+ printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
+ nerrs++;
+ return nerrs;
+ }
+
+ if (vdso_time) {
+ if (t_vdso < 0 || t_vdso != t2_vdso) {
+ printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
+ nerrs++;
+ } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
+ printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO time() is okay\n");
+ }
+ }
+
+ if (vtime) {
+ if (t_vsys < 0 || t_vsys != t2_vsys) {
+ printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
+ nerrs++;
+ } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
+ printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall time() is okay\n");
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_getcpu(int cpu)
+{
+ int nerrs = 0;
+ long ret_sys, ret_vdso = -1, ret_vsys = -1;
+
+ printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
+
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tfailed to force CPU %d\n", cpu);
+ return nerrs;
+ }
+
+ unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
+ unsigned node = 0;
+ bool have_node = false;
+ ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+ if (vdso_getcpu)
+ ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+ if (vgetcpu)
+ ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+ if (ret_sys == 0) {
+ if (cpu_sys != cpu) {
+ printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
+ nerrs++;
+ }
+
+ have_node = true;
+ node = node_sys;
+ }
+
+ if (vdso_getcpu) {
+ if (ret_vdso) {
+ printf("[FAIL]\tvDSO getcpu() failed\n");
+ nerrs++;
+ } else {
+ if (!have_node) {
+ have_node = true;
+ node = node_vdso;
+ }
+
+ if (cpu_vdso != cpu) {
+ printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO reported correct CPU\n");
+ }
+
+ if (node_vdso != node) {
+ printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO reported correct node\n");
+ }
+ }
+ }
+
+ if (vgetcpu) {
+ if (ret_vsys) {
+ printf("[FAIL]\tvsyscall getcpu() failed\n");
+ nerrs++;
+ } else {
+ if (!have_node) {
+ have_node = true;
+ node = node_vsys;
+ }
+
+ if (cpu_vsys != cpu) {
+ printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall reported correct CPU\n");
+ }
+
+ if (node_vsys != node) {
+ printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall reported correct node\n");
+ }
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_vsys_r(void)
+{
+#ifdef __x86_64__
+ printf("[RUN]\tChecking read access to the vsyscall page\n");
+ bool can_read;
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ *(volatile int *)0xffffffffff600000;
+ can_read = true;
+ } else {
+ can_read = false;
+ }
+
+ if (can_read && !should_read_vsyscall) {
+ printf("[FAIL]\tWe have read access, but we shouldn't\n");
+ return 1;
+ } else if (!can_read && should_read_vsyscall) {
+ printf("[FAIL]\tWe don't have read access, but we should\n");
+ return 1;
+ } else {
+ printf("[OK]\tgot expected result\n");
+ }
+#endif
+
+ return 0;
+}
+
+
+#ifdef __x86_64__
+#define X86_EFLAGS_TF (1UL << 8)
+static volatile sig_atomic_t num_vsyscall_traps;
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
+
+ if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
+ num_vsyscall_traps++;
+}
+
+static int test_native_vsyscall(void)
+{
+ time_t tmp;
+ bool is_native;
+
+ if (!vtime)
+ return 0;
+
+ printf("[RUN]\tchecking for native vsyscall\n");
+ sethandler(SIGTRAP, sigtrap, 0);
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ vtime(&tmp);
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+ /*
+ * If vsyscalls are emulated, we expect a single trap in the
+ * vsyscall page -- the call instruction will trap with RIP
+ * pointing to the entry point before emulation takes over.
+ * In native mode, we expect two traps, since whatever code
+ * the vsyscall page contains will be more than just a ret
+ * instruction.
+ */
+ is_native = (num_vsyscall_traps > 1);
+
+ printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+ (is_native ? "native" : "emulated"),
+ (int)num_vsyscall_traps);
+
+ return 0;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+ int nerrs = 0;
+
+ init_vdso();
+ nerrs += init_vsys();
+
+ nerrs += test_gtod();
+ nerrs += test_time();
+ nerrs += test_getcpu(0);
+ nerrs += test_getcpu(1);
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ nerrs += test_vsys_r();
+
+#ifdef __x86_64__
+ nerrs += test_native_vsyscall();
+#endif
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/usb/usbip/Makefile.am b/tools/usb/usbip/Makefile.am
index 66f8bf0..45eaa70 100644
--- a/tools/usb/usbip/Makefile.am
+++ b/tools/usb/usbip/Makefile.am
@@ -1,6 +1,7 @@
SUBDIRS := libsrc src
includedir = @includedir@/usbip
include_HEADERS := $(addprefix libsrc/, \
- usbip_common.h vhci_driver.h usbip_host_driver.h)
+ usbip_common.h vhci_driver.h usbip_host_driver.h \
+ list.h sysfs_utils.h usbip_host_common.h)
dist_man_MANS := $(addprefix doc/, usbip.8 usbipd.8)
diff --git a/tools/usb/usbip/libsrc/usbip_common.c b/tools/usb/usbip/libsrc/usbip_common.c
index ac73710..1517a23 100644
--- a/tools/usb/usbip/libsrc/usbip_common.c
+++ b/tools/usb/usbip/libsrc/usbip_common.c
@@ -215,9 +215,16 @@
struct usbip_usb_interface *uinf)
{
char busid[SYSFS_BUS_ID_SIZE];
+ int size;
struct udev_device *sif;
- sprintf(busid, "%s:%d.%d", udev->busid, udev->bConfigurationValue, i);
+ size = snprintf(busid, sizeof(busid), "%s:%d.%d",
+ udev->busid, udev->bConfigurationValue, i);
+ if (size < 0 || (unsigned int)size >= sizeof(busid)) {
+ err("busid length %i >= %lu or < 0", size,
+ (long unsigned)sizeof(busid));
+ return -1;
+ }
sif = udev_device_new_from_subsystem_sysname(udev_context, "usb", busid);
if (!sif) {
diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c
index 9d41522..6ff7b60 100644
--- a/tools/usb/usbip/libsrc/usbip_host_common.c
+++ b/tools/usb/usbip/libsrc/usbip_host_common.c
@@ -40,13 +40,20 @@
static int32_t read_attr_usbip_status(struct usbip_usb_device *udev)
{
char status_attr_path[SYSFS_PATH_MAX];
+ int size;
int fd;
int length;
char status;
int value = 0;
- snprintf(status_attr_path, SYSFS_PATH_MAX, "%s/usbip_status",
- udev->path);
+ size = snprintf(status_attr_path, sizeof(status_attr_path),
+ "%s/usbip_status", udev->path);
+ if (size < 0 || (unsigned int)size >= sizeof(status_attr_path)) {
+ err("usbip_status path length %i >= %lu or < 0", size,
+ (long unsigned)sizeof(status_attr_path));
+ return -1;
+ }
+
fd = open(status_attr_path, O_RDONLY);
if (fd < 0) {
@@ -218,6 +225,7 @@
{
char attr_name[] = "usbip_sockfd";
char sockfd_attr_path[SYSFS_PATH_MAX];
+ int size;
char sockfd_buff[30];
int ret;
@@ -237,10 +245,20 @@
}
/* only the first interface is true */
- snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s",
- edev->udev.path, attr_name);
+ size = snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s",
+ edev->udev.path, attr_name);
+ if (size < 0 || (unsigned int)size >= sizeof(sockfd_attr_path)) {
+ err("exported device path length %i >= %lu or < 0", size,
+ (long unsigned)sizeof(sockfd_attr_path));
+ return -1;
+ }
- snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd);
+ size = snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd);
+ if (size < 0 || (unsigned int)size >= sizeof(sockfd_buff)) {
+ err("socket length %i >= %lu or < 0", size,
+ (long unsigned)sizeof(sockfd_buff));
+ return -1;
+ }
ret = write_sysfs_attribute(sockfd_attr_path, sockfd_buff,
strlen(sockfd_buff));
diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c
index ad92047..1274f32 100644
--- a/tools/usb/usbip/libsrc/vhci_driver.c
+++ b/tools/usb/usbip/libsrc/vhci_driver.c
@@ -55,12 +55,12 @@
while (*c != '\0') {
int port, status, speed, devid;
- unsigned long socket;
+ int sockfd;
char lbusid[SYSFS_BUS_ID_SIZE];
- ret = sscanf(c, "%d %d %d %x %lx %31s\n",
+ ret = sscanf(c, "%d %d %d %x %u %31s\n",
&port, &status, &speed,
- &devid, &socket, lbusid);
+ &devid, &sockfd, lbusid);
if (ret < 5) {
dbg("sscanf failed: %d", ret);
@@ -69,7 +69,7 @@
dbg("port %d status %d speed %d devid %x",
port, status, speed, devid);
- dbg("socket %lx lbusid %s", socket, lbusid);
+ dbg("sockfd %u lbusid %s", sockfd, lbusid);
/* if a device is connected, look at it */
diff --git a/tools/usb/usbip/src/usbip.c b/tools/usb/usbip/src/usbip.c
index d7599d9..73d8eee 100644
--- a/tools/usb/usbip/src/usbip.c
+++ b/tools/usb/usbip/src/usbip.c
@@ -176,6 +176,8 @@
break;
case '?':
printf("usbip: invalid option\n");
+ /* Terminate after printing error */
+ /* FALLTHRU */
default:
usbip_usage();
goto out;
diff --git a/tools/usb/usbip/src/usbip_bind.c b/tools/usb/usbip/src/usbip_bind.c
index fa46141..e121cfb 100644
--- a/tools/usb/usbip/src/usbip_bind.c
+++ b/tools/usb/usbip/src/usbip_bind.c
@@ -144,6 +144,7 @@
int rc;
struct udev *udev;
struct udev_device *dev;
+ const char *devpath;
/* Check whether the device with this bus ID exists. */
udev = udev_new();
@@ -152,8 +153,16 @@
err("device with the specified bus ID does not exist");
return -1;
}
+ devpath = udev_device_get_devpath(dev);
udev_unref(udev);
+ /* If the device is already attached to vhci_hcd - bail out */
+ if (strstr(devpath, USBIP_VHCI_DRV_NAME)) {
+ err("bind loop detected: device: %s is attached to %s\n",
+ devpath, USBIP_VHCI_DRV_NAME);
+ return -1;
+ }
+
rc = unbind_other(busid);
if (rc == UNBIND_ST_FAILED) {
err("could not unbind driver from device on busid %s", busid);
diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c
index f1b38e8..d65a9f4 100644
--- a/tools/usb/usbip/src/usbip_list.c
+++ b/tools/usb/usbip/src/usbip_list.c
@@ -187,6 +187,7 @@
const char *busid;
char product_name[128];
int ret = -1;
+ const char *devpath;
/* Create libudev context. */
udev = udev_new();
@@ -209,6 +210,14 @@
path = udev_list_entry_get_name(dev_list_entry);
dev = udev_device_new_from_syspath(udev, path);
+ /* Ignore devices attached to vhci_hcd */
+ devpath = udev_device_get_devpath(dev);
+ if (strstr(devpath, USBIP_VHCI_DRV_NAME)) {
+ dbg("Skip the device %s already attached to %s\n",
+ devpath, USBIP_VHCI_DRV_NAME);
+ continue;
+ }
+
/* Get device information. */
idVendor = udev_device_get_sysattr_value(dev, "idVendor");
idProduct = udev_device_get_sysattr_value(dev, "idProduct");
diff --git a/tools/usb/usbip/src/utils.c b/tools/usb/usbip/src/utils.c
index 2b3d6d2..3d7b42e 100644
--- a/tools/usb/usbip/src/utils.c
+++ b/tools/usb/usbip/src/utils.c
@@ -30,6 +30,7 @@
char command[SYSFS_BUS_ID_SIZE + 4];
char match_busid_attr_path[SYSFS_PATH_MAX];
int rc;
+ int cmd_size;
snprintf(match_busid_attr_path, sizeof(match_busid_attr_path),
"%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME,
@@ -37,12 +38,14 @@
attr_name);
if (add)
- snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid);
+ cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s",
+ busid);
else
- snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid);
+ cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s",
+ busid);
rc = write_sysfs_attribute(match_busid_attr_path, command,
- sizeof(command));
+ cmd_size);
if (rc < 0) {
dbg("failed to write match_busid: %s", strerror(errno));
return -1;
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 27a1f63..7b49a13 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -89,9 +89,6 @@
struct kvm_vcpu *vcpu;
vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
- vcpu->arch.timer_cpu.armed = false;
-
- WARN_ON(!kvm_timer_should_fire(vcpu));
/*
* If the vcpu is blocked we want to wake it up so that it will see
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index c8aeb7b..9502124 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -77,11 +77,7 @@
else
elrsr1 = 0;
-#ifdef CONFIG_CPU_BIG_ENDIAN
- cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
-#else
cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
-#endif
}
static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c
index f138ed2..a26c677 100644
--- a/virt/kvm/arm/vgic/vgic-irqfd.c
+++ b/virt/kvm/arm/vgic/vgic-irqfd.c
@@ -112,8 +112,7 @@
u32 nr = dist->nr_spis;
int i, ret;
- entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry),
- GFP_KERNEL);
+ entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL);
if (!entries)
return -ENOMEM;
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 4660a7d..31f5625 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -322,6 +322,7 @@
int ret = 0;
u32 *intids;
int nr_irqs, i;
+ u8 pendmask;
nr_irqs = vgic_copy_lpi_list(vcpu->kvm, &intids);
if (nr_irqs < 0)
@@ -329,7 +330,6 @@
for (i = 0; i < nr_irqs; i++) {
int byte_offset, bit_nr;
- u8 pendmask;
byte_offset = intids[i] / BITS_PER_BYTE;
bit_nr = intids[i] % BITS_PER_BYTE;
@@ -360,29 +360,6 @@
return ret;
}
-static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- u32 reg = 0;
-
- mutex_lock(&its->cmd_lock);
- if (its->creadr == its->cwriter)
- reg |= GITS_CTLR_QUIESCENT;
- if (its->enabled)
- reg |= GITS_CTLR_ENABLE;
- mutex_unlock(&its->cmd_lock);
-
- return reg;
-}
-
-static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- its->enabled = !!(val & GITS_CTLR_ENABLE);
-}
-
static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
struct vgic_its *its,
gpa_t addr, unsigned int len)
@@ -687,6 +664,8 @@
return E_ITS_MAPC_COLLECTION_OOR;
collection = kzalloc(sizeof(*collection), GFP_KERNEL);
+ if (!collection)
+ return -ENOMEM;
collection->collection_id = coll_id;
collection->target_addr = COLLECTION_NOT_MAPPED;
@@ -1160,33 +1139,16 @@
#define ITS_CMD_SIZE 32
#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5))
-/*
- * By writing to CWRITER the guest announces new commands to be processed.
- * To avoid any races in the first place, we take the its_cmd lock, which
- * protects our ring buffer variables, so that there is only one user
- * per ITS handling commands at a given time.
- */
-static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
+/* Must be called with the cmd_lock held. */
+static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
{
gpa_t cbaser;
u64 cmd_buf[4];
- u32 reg;
- if (!its)
+ /* Commands are only processed when the ITS is enabled. */
+ if (!its->enabled)
return;
- mutex_lock(&its->cmd_lock);
-
- reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
- reg = ITS_CMD_OFFSET(reg);
- if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
- mutex_unlock(&its->cmd_lock);
- return;
- }
-
- its->cwriter = reg;
cbaser = CBASER_ADDRESS(its->cbaser);
while (its->cwriter != its->creadr) {
@@ -1206,6 +1168,34 @@
if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
its->creadr = 0;
}
+}
+
+/*
+ * By writing to CWRITER the guest announces new commands to be processed.
+ * To avoid any races in the first place, we take the its_cmd lock, which
+ * protects our ring buffer variables, so that there is only one user
+ * per ITS handling commands at a given time.
+ */
+static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u64 reg;
+
+ if (!its)
+ return;
+
+ mutex_lock(&its->cmd_lock);
+
+ reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
+ reg = ITS_CMD_OFFSET(reg);
+ if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+ mutex_unlock(&its->cmd_lock);
+ return;
+ }
+ its->cwriter = reg;
+
+ vgic_its_process_commands(kvm, its);
mutex_unlock(&its->cmd_lock);
}
@@ -1286,6 +1276,39 @@
*regptr = reg;
}
+static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
+ struct vgic_its *its,
+ gpa_t addr, unsigned int len)
+{
+ u32 reg = 0;
+
+ mutex_lock(&its->cmd_lock);
+ if (its->creadr == its->cwriter)
+ reg |= GITS_CTLR_QUIESCENT;
+ if (its->enabled)
+ reg |= GITS_CTLR_ENABLE;
+ mutex_unlock(&its->cmd_lock);
+
+ return reg;
+}
+
+static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ mutex_lock(&its->cmd_lock);
+
+ its->enabled = !!(val & GITS_CTLR_ENABLE);
+
+ /*
+ * Try to process any pending commands. This function bails out early
+ * if the ITS is disabled or no commands have been queued.
+ */
+ vgic_its_process_commands(kvm, its);
+
+ mutex_unlock(&its->cmd_lock);
+}
+
#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \
{ \
.reg_offset = off, \
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f4c6d4f..1b20768 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -125,6 +125,11 @@
static bool largepages_enabled = true;
+__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
+{
+}
+
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{
if (pfn_valid(pfn))
@@ -361,6 +366,9 @@
kvm_flush_remote_tlbs(kvm);
spin_unlock(&kvm->mmu_lock);
+
+ kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -1052,7 +1060,7 @@
* changes) is disallowed above, so any other attribute changes getting
* here can be skipped.
*/
- if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+ if (as_id == 0 && (change == KVM_MR_CREATE || change == KVM_MR_MOVE)) {
r = kvm_iommu_map_pages(kvm, &new);
return r;
}
@@ -3896,7 +3904,7 @@
if (!vcpu_align)
vcpu_align = __alignof__(struct kvm_vcpu);
kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
- 0, NULL);
+ SLAB_ACCOUNT, NULL);
if (!kvm_vcpu_cache) {
r = -ENOMEM;
goto out_free_3;