Merge branch 'x86/amd-avic' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu into HEAD
Merge IOMMU bits for virtualization of interrupt injection into
virtual machines.
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 4da60b4..ccc6032 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -53,6 +53,7 @@
| ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 |
| ARM | Cortex-A57 | #852523 | N/A |
| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 |
+| ARM | Cortex-A72 | #853709 | N/A |
| ARM | MMU-500 | #841119,#826419 | N/A |
| | | | |
| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 |
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index d515d58..2a39040 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -14,6 +14,12 @@
This file allows to turn off the disk entropy contribution. Default
value of this file is '1'(on).
+dax (RO)
+--------
+This file indicates whether the device supports Direct Access (DAX),
+used by CPU-addressable storage to bypass the pagecache. It shows '1'
+if true, '0' if not.
+
discard_granularity (RO)
-----------------------
This shows the size of internal allocation of the device in bytes, if
@@ -46,6 +52,12 @@
-------------------
This is the hardware sector size of the device, in bytes.
+io_poll (RW)
+------------
+When read, this file shows the total number of block IO polls and how
+many returned success. Writing '0' to this file will disable polling
+for this device. Writing any non-zero value will enable this feature.
+
iostats (RW)
-------------
This file is used to control (on/off) the iostats accounting of the
@@ -151,5 +163,11 @@
setting from "write back" to "write through", since that will also
eliminate cache flushes issued by the kernel.
+write_same_max_bytes (RO)
+-------------------------
+This is the number of bytes the device can write in a single write-same
+command. A value of '0' means write-same is not supported by this
+device.
+
Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 16a924c..70c926a 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -790,13 +790,12 @@
Data messages can have their contents extracted with the usual bunch of
socket buffer manipulation functions. A data message can be determined to
be the last one in a sequence with rxrpc_kernel_is_data_last(). When a
- data message has been used up, rxrpc_kernel_data_delivered() should be
- called on it..
+ data message has been used up, rxrpc_kernel_data_consumed() should be
+ called on it.
- Non-data messages should be handled to rxrpc_kernel_free_skb() to dispose
- of. It is possible to get extra refs on all types of message for later
- freeing, but this may pin the state of a call until the message is finally
- freed.
+ Messages should be handled to rxrpc_kernel_free_skb() to dispose of. It
+ is possible to get extra refs on all types of message for later freeing,
+ but this may pin the state of a call until the message is finally freed.
(*) Accept an incoming call.
@@ -821,12 +820,14 @@
Other errors may be returned if the call had been aborted (-ECONNABORTED)
or had timed out (-ETIME).
- (*) Record the delivery of a data message and free it.
+ (*) Record the delivery of a data message.
- void rxrpc_kernel_data_delivered(struct sk_buff *skb);
+ void rxrpc_kernel_data_consumed(struct rxrpc_call *call,
+ struct sk_buff *skb);
- This is used to record a data message as having been delivered and to
- update the ACK state for the call. The socket buffer will be freed.
+ This is used to record a data message as having been consumed and to
+ update the ACK state for the call. The message must still be passed to
+ rxrpc_kernel_free_skb() for disposal by the caller.
(*) Free a message.
diff --git a/MAINTAINERS b/MAINTAINERS
index 20bb1d0..a306795 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1004,6 +1004,7 @@
ARM/Annapurna Labs ALPINE ARCHITECTURE
M: Tsahee Zidenberg <tsahee@annapurnalabs.com>
M: Antoine Tenart <antoine.tenart@free-electrons.com>
+L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/mach-alpine/
F: arch/arm/boot/dts/alpine*
diff --git a/Makefile b/Makefile
index 70de144..5c18baa 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 4
PATCHLEVEL = 8
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
NAME = Psychotic Stoned Sheep
# *DOCUMENTATION*
@@ -635,13 +635,6 @@
# Tell gcc to never replace conditional load with a non-conditional one
KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0)
-PHONY += gcc-plugins
-gcc-plugins: scripts_basic
-ifdef CONFIG_GCC_PLUGINS
- $(Q)$(MAKE) $(build)=scripts/gcc-plugins
-endif
- @:
-
include scripts/Makefile.gcc-plugins
ifdef CONFIG_READABLE_ASM
diff --git a/arch/Kconfig b/arch/Kconfig
index bd8056b..e9c9334 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -461,6 +461,15 @@
endchoice
+config HAVE_ARCH_WITHIN_STACK_FRAMES
+ bool
+ help
+ An architecture should select this if it can walk the kernel stack
+ frames to determine if an object is part of either the arguments
+ or local variables (i.e. that it excludes saved return addresses,
+ and similar) by implementing an inline arch_within_stack_frames(),
+ which is used by CONFIG_HARDENED_USERCOPY.
+
config HAVE_CONTEXT_TRACKING
bool
help
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2d601d7..a9c4e48 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -35,6 +35,7 @@
select HARDIRQS_SW_RESEND
select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
+ select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_MMAP_RND_BITS if MMU
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 56ea5c60b..61f6ccc 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -260,12 +260,14 @@
platdirs := $(patsubst %,arch/arm/plat-%/,$(sort $(plat-y)))
ifneq ($(CONFIG_ARCH_MULTIPLATFORM),y)
+ifneq ($(CONFIG_ARM_SINGLE_ARMV7M),y)
ifeq ($(KBUILD_SRC),)
KBUILD_CPPFLAGS += $(patsubst %,-I%include,$(machdirs) $(platdirs))
else
KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(machdirs) $(platdirs))
endif
endif
+endif
export TEXT_OFFSET GZFLAGS MMUEXT
diff --git a/arch/arm/boot/dts/arm-realview-pbx-a9.dts b/arch/arm/boot/dts/arm-realview-pbx-a9.dts
index db808f9..90d00b4 100644
--- a/arch/arm/boot/dts/arm-realview-pbx-a9.dts
+++ b/arch/arm/boot/dts/arm-realview-pbx-a9.dts
@@ -70,13 +70,12 @@
* associativity as these may be erroneously set
* up by boot loader(s).
*/
- cache-size = <1048576>; // 1MB
- cache-sets = <4096>;
+ cache-size = <131072>; // 128KB
+ cache-sets = <512>;
cache-line-size = <32>;
arm,parity-disable;
- arm,tag-latency = <1>;
- arm,data-latency = <1 1>;
- arm,dirty-latency = <1>;
+ arm,tag-latency = <1 1 1>;
+ arm,data-latency = <1 1 1>;
};
scu: scu@1f000000 {
diff --git a/arch/arm/boot/dts/integratorap.dts b/arch/arm/boot/dts/integratorap.dts
index cf06e32..4b34b54 100644
--- a/arch/arm/boot/dts/integratorap.dts
+++ b/arch/arm/boot/dts/integratorap.dts
@@ -42,7 +42,7 @@
};
syscon {
- compatible = "arm,integrator-ap-syscon";
+ compatible = "arm,integrator-ap-syscon", "syscon";
reg = <0x11000000 0x100>;
interrupt-parent = <&pic>;
/* These are the logical module IRQs */
diff --git a/arch/arm/boot/dts/integratorcp.dts b/arch/arm/boot/dts/integratorcp.dts
index d43f15b..79430fb 100644
--- a/arch/arm/boot/dts/integratorcp.dts
+++ b/arch/arm/boot/dts/integratorcp.dts
@@ -94,7 +94,7 @@
};
syscon {
- compatible = "arm,integrator-cp-syscon";
+ compatible = "arm,integrator-cp-syscon", "syscon";
reg = <0xcb000000 0x100>;
};
diff --git a/arch/arm/boot/dts/keystone.dtsi b/arch/arm/boot/dts/keystone.dtsi
index 00cb314..e23f46d 100644
--- a/arch/arm/boot/dts/keystone.dtsi
+++ b/arch/arm/boot/dts/keystone.dtsi
@@ -70,14 +70,6 @@
cpu_on = <0x84000003>;
};
- psci {
- compatible = "arm,psci";
- method = "smc";
- cpu_suspend = <0x84000001>;
- cpu_off = <0x84000002>;
- cpu_on = <0x84000003>;
- };
-
soc {
#address-cells = <1>;
#size-cells = <1>;
diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts
index e52b824..6403e0d 100644
--- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts
+++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts
@@ -1382,7 +1382,7 @@
* Pin 41: BR_UART1_TXD
* Pin 44: BR_UART1_RXD
*/
- serial@70006000 {
+ serial@0,70006000 {
compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
status = "okay";
};
@@ -1394,7 +1394,7 @@
* Pin 71: UART2_CTS_L
* Pin 74: UART2_RTS_L
*/
- serial@70006040 {
+ serial@0,70006040 {
compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
status = "okay";
};
diff --git a/arch/arm/configs/aspeed_g4_defconfig b/arch/arm/configs/aspeed_g4_defconfig
index b6e54ee..ca39c04 100644
--- a/arch/arm/configs/aspeed_g4_defconfig
+++ b/arch/arm/configs/aspeed_g4_defconfig
@@ -58,7 +58,7 @@
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_FIRMWARE_MEMMAP=y
CONFIG_FANOTIFY=y
-CONFIG_PRINTK_TIME=1
+CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_PAGE_POISONING=y
diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig
index 8926051..4f366b0 100644
--- a/arch/arm/configs/aspeed_g5_defconfig
+++ b/arch/arm/configs/aspeed_g5_defconfig
@@ -59,7 +59,7 @@
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_FIRMWARE_MEMMAP=y
CONFIG_FANOTIFY=y
-CONFIG_PRINTK_TIME=1
+CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_PAGE_POISONING=y
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 62a6f65..a93c0f9 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -480,7 +480,10 @@
static inline unsigned long __must_check
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
- unsigned int __ua_flags = uaccess_save_and_enable();
+ unsigned int __ua_flags;
+
+ check_object_size(to, n, false);
+ __ua_flags = uaccess_save_and_enable();
n = arm_copy_from_user(to, from, n);
uaccess_restore(__ua_flags);
return n;
@@ -495,11 +498,15 @@
__copy_to_user(void __user *to, const void *from, unsigned long n)
{
#ifndef CONFIG_UACCESS_WITH_MEMCPY
- unsigned int __ua_flags = uaccess_save_and_enable();
+ unsigned int __ua_flags;
+
+ check_object_size(from, n, true);
+ __ua_flags = uaccess_save_and_enable();
n = arm_copy_to_user(to, from, n);
uaccess_restore(__ua_flags);
return n;
#else
+ check_object_size(from, n, true);
return arm_copy_to_user(to, from, n);
#endif
}
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 087acb5..5f221ac 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -279,8 +279,12 @@
mm_segment_t fs;
long ret, err, i;
- if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
+ if (maxevents <= 0 ||
+ maxevents > (INT_MAX/sizeof(*kbuf)) ||
+ maxevents > (INT_MAX/sizeof(*events)))
return -EINVAL;
+ if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
+ return -EFAULT;
kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
@@ -317,6 +321,8 @@
if (nsops < 1 || nsops > SEMOPM)
return -EINVAL;
+ if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
+ return -EFAULT;
sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
if (!sops)
return -ENOMEM;
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index d94bb90..75f130e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -1009,9 +1009,13 @@
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
+ int ret;
if (!vgic_present)
return -ENXIO;
- return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
+ mutex_lock(&kvm->lock);
+ ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
+ mutex_unlock(&kvm->lock);
+ return ret;
}
case KVM_ARM_SET_DEVICE_ADDR: {
struct kvm_arm_device_addr dev_addr;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index bda27b6..29d0b23 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1309,7 +1309,7 @@
smp_rmb();
pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
- if (is_error_pfn(pfn))
+ if (is_error_noslot_pfn(pfn))
return -EFAULT;
if (kvm_is_device_pfn(pfn)) {
diff --git a/arch/arm/mach-clps711x/Kconfig b/arch/arm/mach-clps711x/Kconfig
index dc7c6ed..61284b9 100644
--- a/arch/arm/mach-clps711x/Kconfig
+++ b/arch/arm/mach-clps711x/Kconfig
@@ -1,13 +1,13 @@
menuconfig ARCH_CLPS711X
bool "Cirrus Logic EP721x/EP731x-based"
depends on ARCH_MULTI_V4T
- select ARCH_REQUIRE_GPIOLIB
select AUTO_ZRELADDR
select CLKSRC_OF
select CLPS711X_TIMER
select COMMON_CLK
select CPU_ARM720T
select GENERIC_CLOCKEVENTS
+ select GPIOLIB
select MFD_SYSCON
select OF_IRQ
select USE_OF
diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile
index e53c6cf..6c6497e 100644
--- a/arch/arm/mach-mvebu/Makefile
+++ b/arch/arm/mach-mvebu/Makefile
@@ -1,5 +1,4 @@
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
- -I$(srctree)/arch/arm/plat-orion/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-orion/include
AFLAGS_coherency_ll.o := -Wa,-march=armv7-a
CFLAGS_pmsu.o := -march=armv7-a
diff --git a/arch/arm/mach-oxnas/Kconfig b/arch/arm/mach-oxnas/Kconfig
index 567496b..29100be 100644
--- a/arch/arm/mach-oxnas/Kconfig
+++ b/arch/arm/mach-oxnas/Kconfig
@@ -11,11 +11,13 @@
config MACH_OX810SE
bool "Support OX810SE Based Products"
+ select ARCH_HAS_RESET_CONTROLLER
select COMMON_CLK_OXNAS
select CPU_ARM926T
select MFD_SYSCON
select OXNAS_RPS_TIMER
select PINCTRL_OXNAS
+ select RESET_CONTROLLER
select RESET_OXNAS
select VERSATILE_FPGA_IRQ
help
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index dc109dc3..10bfdb1 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -13,6 +13,7 @@
*/
#include <linux/kernel.h>
+#include <linux/module.h> /* symbol_get ; symbol_put */
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/major.h>
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 1080580..2c150bf 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -13,6 +13,7 @@
*/
#include <linux/kernel.h>
+#include <linux/module.h> /* symbol_get ; symbol_put */
#include <linux/platform_device.h>
#include <linux/delay.h>
#include <linux/gpio_keys.h>
diff --git a/arch/arm/mach-realview/Makefile b/arch/arm/mach-realview/Makefile
index dae8d86..4048821 100644
--- a/arch/arm/mach-realview/Makefile
+++ b/arch/arm/mach-realview/Makefile
@@ -1,8 +1,7 @@
#
# Makefile for the linux kernel.
#
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
- -I$(srctree)/arch/arm/plat-versatile/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-versatile/include
obj-y := core.o
obj-$(CONFIG_REALVIEW_DT) += realview-dt.o
diff --git a/arch/arm/mach-s5pv210/Makefile b/arch/arm/mach-s5pv210/Makefile
index 72b9e96..fa7fb71 100644
--- a/arch/arm/mach-s5pv210/Makefile
+++ b/arch/arm/mach-s5pv210/Makefile
@@ -5,7 +5,7 @@
#
# Licensed under GPLv2
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/$(src)/include -I$(srctree)/arch/arm/plat-samsung/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/arch/arm/plat-samsung/include
# Core
diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c
index f3dba6f..02e21bc 100644
--- a/arch/arm/mach-shmobile/platsmp.c
+++ b/arch/arm/mach-shmobile/platsmp.c
@@ -40,5 +40,8 @@
bool __init shmobile_smp_init_fallback_ops(void)
{
/* fallback on PSCI/smp_ops if no other DT based method is detected */
+ if (!IS_ENABLED(CONFIG_SMP))
+ return false;
+
return platform_can_secondary_boot() ? true : false;
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 69c8787..bc3f00f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -54,6 +54,7 @@
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
+ select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index bb2616b..be5d824 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -8,7 +8,7 @@
config ARCH_ALPINE
bool "Annapurna Labs Alpine platform"
- select ALPINE_MSI
+ select ALPINE_MSI if PCI
help
This enables support for the Annapurna Labs Alpine
Soc family.
@@ -66,7 +66,7 @@
config ARCH_HISI
bool "Hisilicon SoC Family"
select ARM_TIMER_SP804
- select HISILICON_IRQ_MBIGEN
+ select HISILICON_IRQ_MBIGEN if PCI
help
This enables support for Hisilicon ARMv8 SoC family
diff --git a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
index 299f3ce..c528dd5 100644
--- a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
+++ b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
@@ -12,6 +12,7 @@
/dts-v1/;
#include "exynos7.dtsi"
#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/clock/samsung,s2mps11.h>
/ {
model = "Samsung Exynos7 Espresso board based on EXYNOS7";
@@ -43,6 +44,8 @@
&rtc {
status = "okay";
+ clocks = <&clock_ccore PCLK_RTC>, <&s2mps15_osc S2MPS11_CLK_AP>;
+ clock-names = "rtc", "rtc_src";
};
&watchdog {
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 0555b7c..eadf485 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1,4 +1,3 @@
-# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_AUDIT=y
@@ -15,10 +14,14 @@
CONFIG_LOG_BUF_SHIFT=14
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_HUGETLB=y
-# CONFIG_UTS_NS is not set
-# CONFIG_IPC_NS is not set
-# CONFIG_NET_NS is not set
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_USER_NS=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
@@ -71,6 +74,7 @@
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CMA=y
+CONFIG_SECCOMP=y
CONFIG_XEN=y
CONFIG_KEXEC=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
@@ -84,10 +88,37 @@
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_IPV6 is not set
+CONFIG_IPV6=m
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_VLAN_8021Q_MVRP=y
CONFIG_BPF_JIT=y
CONFIG_CFG80211=m
CONFIG_MAC80211=m
@@ -103,6 +134,7 @@
CONFIG_MTD_M25P80=y
CONFIG_MTD_SPI_NOR=y
CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
CONFIG_VIRTIO_BLK=y
CONFIG_SRAM=y
# CONFIG_SCSI_PROC_FS is not set
@@ -120,7 +152,10 @@
CONFIG_PATA_PLATFORM=y
CONFIG_PATA_OF_PLATFORM=y
CONFIG_NETDEVICES=y
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
CONFIG_TUN=y
+CONFIG_VETH=m
CONFIG_VIRTIO_NET=y
CONFIG_AMD_XGBE=y
CONFIG_NET_XGENE=y
@@ -350,12 +385,16 @@
CONFIG_PWM_SAMSUNG=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA=y
CONFIG_AUTOFS4_FS=y
-CONFIG_FUSE_FS=y
-CONFIG_CUSE=y
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index 61b4915..1737aec 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -22,7 +22,6 @@
#define __ARCH_WANT_KPROBES_INSN_SLOT
#define MAX_INSN_SIZE 1
-#define MAX_STACK_SIZE 128
#define flush_insn_slot(p) do { } while (0)
#define kretprobe_blacklist_size 0
@@ -47,7 +46,6 @@
struct prev_kprobe prev_kprobe;
struct kprobe_step_ctx ss_ctx;
struct pt_regs jprobe_saved_regs;
- char jprobes_stack[MAX_STACK_SIZE];
};
void arch_remove_kprobe(struct kprobe *);
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 5e834d1..c47257c9 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -265,22 +265,25 @@
static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n)
{
kasan_check_write(to, n);
- return __arch_copy_from_user(to, from, n);
+ check_object_size(to, n, false);
+ return __arch_copy_from_user(to, from, n);
}
static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n)
{
kasan_check_read(from, n);
- return __arch_copy_to_user(to, from, n);
+ check_object_size(from, n, true);
+ return __arch_copy_to_user(to, from, n);
}
static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
{
kasan_check_write(to, n);
- if (access_ok(VERIFY_READ, from, n))
+ if (access_ok(VERIFY_READ, from, n)) {
+ check_object_size(to, n, false);
n = __arch_copy_from_user(to, from, n);
- else /* security hole - plug it */
+ } else /* security hole - plug it */
memset(to, 0, n);
return n;
}
@@ -289,8 +292,10 @@
{
kasan_check_read(from, n);
- if (access_ok(VERIFY_WRITE, to, n))
+ if (access_ok(VERIFY_WRITE, to, n)) {
+ check_object_size(from, n, true);
n = __arch_copy_to_user(to, from, n);
+ }
return n;
}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 96e4a2b..441420c 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -353,6 +353,8 @@
lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class
cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1
b.eq el1_da
+ cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1
+ b.eq el1_ia
cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
b.eq el1_undef
cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
@@ -364,6 +366,11 @@
cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1
b.ge el1_dbg
b el1_inv
+
+el1_ia:
+ /*
+ * Fall through to the Data abort case
+ */
el1_da:
/*
* Data abort handling
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 21ab5df..65d81f9 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -35,6 +35,7 @@
#include <asm/sections.h>
#include <asm/smp.h>
#include <asm/suspend.h>
+#include <asm/sysreg.h>
#include <asm/virt.h>
/*
@@ -217,12 +218,22 @@
set_pte(pte, __pte(virt_to_phys((void *)dst) |
pgprot_val(PAGE_KERNEL_EXEC)));
- /* Load our new page tables */
- asm volatile("msr ttbr0_el1, %0;"
- "isb;"
- "tlbi vmalle1is;"
- "dsb ish;"
- "isb" : : "r"(virt_to_phys(pgd)));
+ /*
+ * Load our new page tables. A strict BBM approach requires that we
+ * ensure that TLBs are free of any entries that may overlap with the
+ * global mappings we are about to install.
+ *
+ * For a real hibernate/resume cycle TTBR0 currently points to a zero
+ * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
+ * runtime services), while for a userspace-driven test_resume cycle it
+ * points to userspace page tables (and we must point it at a zero page
+ * ourselves). Elsewhere we only (un)install the idmap with preemption
+ * disabled, so T0SZ should be as required regardless.
+ */
+ cpu_set_reserved_ttbr0();
+ local_flush_tlb_all();
+ write_sysreg(virt_to_phys(pgd), ttbr0_el1);
+ isb();
*phys_dst_addr = virt_to_phys((void *)dst);
@@ -394,6 +405,38 @@
void *, phys_addr_t, phys_addr_t);
/*
+ * Restoring the memory image will overwrite the ttbr1 page tables.
+ * Create a second copy of just the linear map, and use this when
+ * restoring.
+ */
+ tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+ if (!tmp_pg_dir) {
+ pr_err("Failed to allocate memory for temporary page tables.");
+ rc = -ENOMEM;
+ goto out;
+ }
+ rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
+ if (rc)
+ goto out;
+
+ /*
+ * Since we only copied the linear map, we need to find restore_pblist's
+ * linear map address.
+ */
+ lm_restore_pblist = LMADDR(restore_pblist);
+
+ /*
+ * We need a zero page that is zero before & after resume in order to
+ * to break before make on the ttbr1 page tables.
+ */
+ zero_page = (void *)get_safe_page(GFP_ATOMIC);
+ if (!zero_page) {
+ pr_err("Failed to allocate zero page.");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /*
* Locate the exit code in the bottom-but-one page, so that *NULL
* still has disastrous affects.
*/
@@ -419,27 +462,6 @@
__flush_dcache_area(hibernate_exit, exit_size);
/*
- * Restoring the memory image will overwrite the ttbr1 page tables.
- * Create a second copy of just the linear map, and use this when
- * restoring.
- */
- tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
- if (!tmp_pg_dir) {
- pr_err("Failed to allocate memory for temporary page tables.");
- rc = -ENOMEM;
- goto out;
- }
- rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
- if (rc)
- goto out;
-
- /*
- * Since we only copied the linear map, we need to find restore_pblist's
- * linear map address.
- */
- lm_restore_pblist = LMADDR(restore_pblist);
-
- /*
* KASLR will cause the el2 vectors to be in a different location in
* the resumed kernel. Load hibernate's temporary copy into el2.
*
@@ -453,12 +475,6 @@
__hyp_set_vectors(el2_vectors);
}
- /*
- * We need a zero page that is zero before & after resume in order to
- * to break before make on the ttbr1 page tables.
- */
- zero_page = (void *)get_safe_page(GFP_ATOMIC);
-
hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
resume_hdr.reenter_kernel, lm_restore_pblist,
resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index bf97685..c6b0f40 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -41,18 +41,6 @@
static void __kprobes
post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
-static inline unsigned long min_stack_size(unsigned long addr)
-{
- unsigned long size;
-
- if (on_irq_stack(addr, raw_smp_processor_id()))
- size = IRQ_STACK_PTR(raw_smp_processor_id()) - addr;
- else
- size = (unsigned long)current_thread_info() + THREAD_START_SP - addr;
-
- return min(size, FIELD_SIZEOF(struct kprobe_ctlblk, jprobes_stack));
-}
-
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
/* prepare insn slot */
@@ -489,20 +477,15 @@
{
struct jprobe *jp = container_of(p, struct jprobe, kp);
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- long stack_ptr = kernel_stack_pointer(regs);
kcb->jprobe_saved_regs = *regs;
/*
- * As Linus pointed out, gcc assumes that the callee
- * owns the argument space and could overwrite it, e.g.
- * tailcall optimization. So, to be absolutely safe
- * we also save and restore enough stack bytes to cover
- * the argument area.
+ * Since we can't be sure where in the stack frame "stacked"
+ * pass-by-value arguments are stored we just don't try to
+ * duplicate any of the stack. Do not use jprobes on functions that
+ * use more than 64 bytes (after padding each to an 8 byte boundary)
+ * of arguments, or pass individual arguments larger than 16 bytes.
*/
- kasan_disable_current();
- memcpy(kcb->jprobes_stack, (void *)stack_ptr,
- min_stack_size(stack_ptr));
- kasan_enable_current();
instruction_pointer_set(regs, (unsigned long) jp->entry);
preempt_disable();
@@ -554,10 +537,6 @@
}
unpause_graph_tracing();
*regs = kcb->jprobe_saved_regs;
- kasan_disable_current();
- memcpy((void *)stack_addr, kcb->jprobes_stack,
- min_stack_size(stack_addr));
- kasan_enable_current();
preempt_enable_no_resched();
return 1;
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 76a6d92..d93d433 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -661,9 +661,9 @@
acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
acpi_parse_gic_cpu_interface, 0);
- if (cpu_count > NR_CPUS)
- pr_warn("no. of cores (%d) greater than configured maximum of %d - clipping\n",
- cpu_count, NR_CPUS);
+ if (cpu_count > nr_cpu_ids)
+ pr_warn("Number of cores (%d) exceeds configured maximum of %d - clipping\n",
+ cpu_count, nr_cpu_ids);
if (!bootcpu_valid) {
pr_err("missing boot CPU MPIDR, not enabling secondaries\n");
@@ -677,7 +677,7 @@
* with entries in cpu_logical_map while initializing the cpus.
* If the cpu set-up fails, invalidate the cpu_logical_map entry.
*/
- for (i = 1; i < NR_CPUS; i++) {
+ for (i = 1; i < nr_cpu_ids; i++) {
if (cpu_logical_map(i) != INVALID_HWID) {
if (smp_cpu_setup(i))
cpu_logical_map(i) = INVALID_HWID;
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index ae7855f..5a84b45 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -256,7 +256,7 @@
/*
* We must restore the 32-bit state before the sysregs, thanks
- * to Cortex-A57 erratum #852523.
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
*/
__sysreg32_restore_state(vcpu);
__sysreg_restore_guest_state(guest_ctxt);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index b0b225c..e51367d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -823,14 +823,6 @@
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
*
- * We could trap ID_DFR0 and tell the guest we don't support performance
- * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
- * NAKed, so it will read the PMCR anyway.
- *
- * Therefore we tell the guest we have 0 counters. Unfortunately, we
- * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
- * all PM registers, which doesn't crash the guest kernel at least.
- *
* Debug handling: We do trap most, if not all debug related system
* registers. The implementation is good enough to ensure that a guest
* can use these with minimal performance degradation. The drawback is
@@ -1360,7 +1352,7 @@
{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
/* ICC_SRE */
- { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
+ { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index c8beaa0..05d2bd7 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -153,6 +153,11 @@
}
#endif
+static bool is_el1_instruction_abort(unsigned int esr)
+{
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
+}
+
/*
* The kernel tried to access some page that wasn't present.
*/
@@ -161,8 +166,9 @@
{
/*
* Are we prepared to handle this kernel fault?
+ * We are almost certainly not prepared to handle instruction faults.
*/
- if (fixup_exception(regs))
+ if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
return;
/*
@@ -267,7 +273,8 @@
unsigned int ec = ESR_ELx_EC(esr);
unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
- return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
+ return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM) ||
+ (ec == ESR_ELx_EC_IABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
}
static bool is_el0_instruction_abort(unsigned int esr)
@@ -312,6 +319,9 @@
if (regs->orig_addr_limit == KERNEL_DS)
die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
+ if (is_el1_instruction_abort(esr))
+ die("Attempting to execute userspace memory", regs, esr);
+
if (!search_exception_tables(regs->pc))
die("Accessing user space memory outside uaccess.h routines", regs, esr);
}
diff --git a/arch/h8300/include/asm/io.h b/arch/h8300/include/asm/io.h
index 2e221c5..f86918a 100644
--- a/arch/h8300/include/asm/io.h
+++ b/arch/h8300/include/asm/io.h
@@ -3,6 +3,8 @@
#ifdef __KERNEL__
+#include <linux/types.h>
+
/* H8/300 internal I/O functions */
#define __raw_readb __raw_readb
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6a15083..18ca6a9 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -52,6 +52,7 @@
select MODULES_USE_ELF_RELA
select ARCH_USE_CMPXCHG_LOCKREF
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_HARDENED_USERCOPY
default y
help
The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h
index 2189d5d..465c709 100644
--- a/arch/ia64/include/asm/uaccess.h
+++ b/arch/ia64/include/asm/uaccess.h
@@ -241,12 +241,18 @@
static inline unsigned long
__copy_to_user (void __user *to, const void *from, unsigned long count)
{
+ if (!__builtin_constant_p(count))
+ check_object_size(from, count, true);
+
return __copy_user(to, (__force void __user *) from, count);
}
static inline unsigned long
__copy_from_user (void *to, const void __user *from, unsigned long count)
{
+ if (!__builtin_constant_p(count))
+ check_object_size(to, count, false);
+
return __copy_user((__force void __user *) to, from, count);
}
@@ -258,8 +264,11 @@
const void *__cu_from = (from); \
long __cu_len = (n); \
\
- if (__access_ok(__cu_to, __cu_len, get_fs())) \
- __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \
+ if (__access_ok(__cu_to, __cu_len, get_fs())) { \
+ if (!__builtin_constant_p(n)) \
+ check_object_size(__cu_from, __cu_len, true); \
+ __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \
+ } \
__cu_len; \
})
@@ -270,8 +279,11 @@
long __cu_len = (n); \
\
__chk_user_ptr(__cu_from); \
- if (__access_ok(__cu_from, __cu_len, get_fs())) \
+ if (__access_ok(__cu_from, __cu_len, get_fs())) { \
+ if (!__builtin_constant_p(n)) \
+ check_object_size(__cu_to, __cu_len, false); \
__cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \
+ } \
__cu_len; \
})
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 2dcee3a..9202f82 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -213,7 +213,6 @@
static inline void adjustformat(struct pt_regs *regs)
{
- ((struct switch_stack *)regs - 1)->a5 = current->mm->start_data;
/*
* set format byte to make stack appear modulo 4, which it will
* be when doing the rte
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
index 11fa51c..c0ec116 100644
--- a/arch/metag/mm/init.c
+++ b/arch/metag/mm/init.c
@@ -390,7 +390,6 @@
free_all_bootmem();
mem_init_print_info(NULL);
- show_mem(0);
}
void free_initmem(void)
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 6eb52b9..e788515 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -1642,8 +1642,14 @@
preempt_disable();
if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) {
- if (kvm_mips_host_tlb_lookup(vcpu, va) < 0)
- kvm_mips_handle_kseg0_tlb_fault(va, vcpu);
+ if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 &&
+ kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) {
+ kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n",
+ __func__, va, vcpu, read_c0_entryhi());
+ er = EMULATE_FAIL;
+ preempt_enable();
+ goto done;
+ }
} else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) ||
KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) {
int index;
@@ -1680,12 +1686,18 @@
run, vcpu);
preempt_enable();
goto dont_update_pc;
- } else {
- /*
- * We fault an entry from the guest tlb to the
- * shadow host TLB
- */
- kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb);
+ }
+ /*
+ * We fault an entry from the guest tlb to the
+ * shadow host TLB
+ */
+ if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
+ kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
+ __func__, va, index, vcpu,
+ read_c0_entryhi());
+ er = EMULATE_FAIL;
+ preempt_enable();
+ goto done;
}
}
} else {
@@ -2659,7 +2671,12 @@
* OK we have a Guest TLB entry, now inject it into the
* shadow host TLB
*/
- kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb);
+ if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
+ kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
+ __func__, va, index, vcpu,
+ read_c0_entryhi());
+ er = EMULATE_FAIL;
+ }
}
}
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 57319ee5..121008c 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -40,7 +40,7 @@
srcu_idx = srcu_read_lock(&kvm->srcu);
pfn = gfn_to_pfn(kvm, gfn);
- if (is_error_pfn(pfn)) {
+ if (is_error_noslot_pfn(pfn)) {
kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn);
err = -EFAULT;
goto out;
@@ -99,7 +99,7 @@
}
gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT);
- if (gfn >= kvm->arch.guest_pmap_npages) {
+ if ((gfn | 1) >= kvm->arch.guest_pmap_npages) {
kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__,
gfn, badvaddr);
kvm_mips_dump_host_tlbs();
@@ -138,35 +138,49 @@
unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
struct kvm *kvm = vcpu->kvm;
kvm_pfn_t pfn0, pfn1;
+ gfn_t gfn0, gfn1;
+ long tlb_lo[2];
int ret;
- if ((tlb->tlb_hi & VPN2_MASK) == 0) {
- pfn0 = 0;
- pfn1 = 0;
- } else {
- if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[0])
- >> PAGE_SHIFT) < 0)
- return -1;
+ tlb_lo[0] = tlb->tlb_lo[0];
+ tlb_lo[1] = tlb->tlb_lo[1];
- if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[1])
- >> PAGE_SHIFT) < 0)
- return -1;
+ /*
+ * The commpage address must not be mapped to anything else if the guest
+ * TLB contains entries nearby, or commpage accesses will break.
+ */
+ if (!((tlb->tlb_hi ^ KVM_GUEST_COMMPAGE_ADDR) &
+ VPN2_MASK & (PAGE_MASK << 1)))
+ tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0;
- pfn0 = kvm->arch.guest_pmap[
- mips3_tlbpfn_to_paddr(tlb->tlb_lo[0]) >> PAGE_SHIFT];
- pfn1 = kvm->arch.guest_pmap[
- mips3_tlbpfn_to_paddr(tlb->tlb_lo[1]) >> PAGE_SHIFT];
+ gfn0 = mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT;
+ gfn1 = mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT;
+ if (gfn0 >= kvm->arch.guest_pmap_npages ||
+ gfn1 >= kvm->arch.guest_pmap_npages) {
+ kvm_err("%s: Invalid gfn: [%#llx, %#llx], EHi: %#lx\n",
+ __func__, gfn0, gfn1, tlb->tlb_hi);
+ kvm_mips_dump_guest_tlbs(vcpu);
+ return -1;
}
+ if (kvm_mips_map_page(kvm, gfn0) < 0)
+ return -1;
+
+ if (kvm_mips_map_page(kvm, gfn1) < 0)
+ return -1;
+
+ pfn0 = kvm->arch.guest_pmap[gfn0];
+ pfn1 = kvm->arch.guest_pmap[gfn1];
+
/* Get attributes from the Guest TLB */
entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) |
((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
- (tlb->tlb_lo[0] & ENTRYLO_D) |
- (tlb->tlb_lo[0] & ENTRYLO_V);
+ (tlb_lo[0] & ENTRYLO_D) |
+ (tlb_lo[0] & ENTRYLO_V);
entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) |
((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
- (tlb->tlb_lo[1] & ENTRYLO_D) |
- (tlb->tlb_lo[1] & ENTRYLO_V);
+ (tlb_lo[1] & ENTRYLO_D) |
+ (tlb_lo[1] & ENTRYLO_V);
kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
tlb->tlb_lo[0], tlb->tlb_lo[1]);
@@ -354,9 +368,15 @@
local_irq_restore(flags);
return KVM_INVALID_INST;
}
- kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
- &vcpu->arch.
- guest_tlb[index]);
+ if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
+ &vcpu->arch.guest_tlb[index])) {
+ kvm_err("%s: handling mapped seg tlb fault failed for %p, index: %u, vcpu: %p, ASID: %#lx\n",
+ __func__, opc, index, vcpu,
+ read_c0_entryhi());
+ kvm_mips_dump_guest_tlbs(vcpu);
+ local_irq_restore(flags);
+ return KVM_INVALID_INST;
+ }
inst = *(opc);
}
local_irq_restore(flags);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ec4047e..927d2ab 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -166,6 +166,7 @@
select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
select GENERIC_CPU_AUTOPROBE
select HAVE_VIRT_CPU_ACCOUNTING
+ select HAVE_ARCH_HARDENED_USERCOPY
config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index ca25454..1934707 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -66,29 +66,28 @@
UTS_MACHINE := $(OLDARCH)
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override CC += -mlittle-endian
-ifneq ($(cc-name),clang)
-override CC += -mno-strict-align
-endif
-override AS += -mlittle-endian
override LD += -EL
-override CROSS32CC += -mlittle-endian
override CROSS32AS += -mlittle-endian
LDEMULATION := lppc
GNUTARGET := powerpcle
MULTIPLEWORD := -mno-multiple
KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
else
-ifeq ($(call cc-option-yn,-mbig-endian),y)
-override CC += -mbig-endian
-override AS += -mbig-endian
-endif
override LD += -EB
LDEMULATION := ppc
GNUTARGET := powerpc
MULTIPLEWORD := -mmultiple
endif
+cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian)
+cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian
+ifneq ($(cc-name),clang)
+ cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align
+endif
+
+aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian
+
ifeq ($(HAS_BIARCH),y)
override AS += -a$(CONFIG_WORD_SIZE)
override LD += -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION)
@@ -232,6 +231,9 @@
KBUILD_AFLAGS += $(cpu-as-y)
KBUILD_CFLAGS += $(cpu-as-y)
+KBUILD_AFLAGS += $(aflags-y)
+KBUILD_CFLAGS += $(cflags-y)
+
head-y := arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o
head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o
head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index bfe3d37..9fa046d 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -4,6 +4,7 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
+#include <linux/cpufeature.h>
#include <asm/switch_to.h>
#define CHKSUM_BLOCK_SIZE 1
@@ -157,7 +158,7 @@
crypto_unregister_shash(&alg);
}
-module_init(crc32c_vpmsum_mod_init);
+module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init);
module_exit(crc32c_vpmsum_mod_fini);
MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 3d7fc06..01b8a13 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -19,4 +19,17 @@
#endif
+/* Idle state entry routines */
+#ifdef CONFIG_PPC_P7_NAP
+#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
+ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
+ std r0,0(r1); \
+ ptesync; \
+ ld r0,0(r1); \
+1: cmp cr0,r0,r0; \
+ bne 1b; \
+ IDLE_INST; \
+ b .
+#endif /* CONFIG_PPC_P7_NAP */
+
#endif
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 57fec8a..ddf54f5 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -186,6 +186,7 @@
#ifndef __ASSEMBLY__
void apply_feature_fixups(void);
+void setup_feature_keys(void);
#endif
#endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 0a74ebe..17c8380 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -75,14 +75,6 @@
static inline void __giveup_spe(struct task_struct *t) { }
#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-extern void flush_tmregs_to_thread(struct task_struct *);
-#else
-static inline void flush_tmregs_to_thread(struct task_struct *t)
-{
-}
-#endif
-
static inline void clear_task_ebb(struct task_struct *t)
{
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index b7c20f0..c1dc6c1 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -310,10 +310,15 @@
{
unsigned long over;
- if (access_ok(VERIFY_READ, from, n))
+ if (access_ok(VERIFY_READ, from, n)) {
+ if (!__builtin_constant_p(n))
+ check_object_size(to, n, false);
return __copy_tofrom_user((__force void __user *)to, from, n);
+ }
if ((unsigned long)from < TASK_SIZE) {
over = (unsigned long)from + n - TASK_SIZE;
+ if (!__builtin_constant_p(n - over))
+ check_object_size(to, n - over, false);
return __copy_tofrom_user((__force void __user *)to, from,
n - over) + over;
}
@@ -325,10 +330,15 @@
{
unsigned long over;
- if (access_ok(VERIFY_WRITE, to, n))
+ if (access_ok(VERIFY_WRITE, to, n)) {
+ if (!__builtin_constant_p(n))
+ check_object_size(from, n, true);
return __copy_tofrom_user(to, (__force void __user *)from, n);
+ }
if ((unsigned long)to < TASK_SIZE) {
over = (unsigned long)to + n - TASK_SIZE;
+ if (!__builtin_constant_p(n))
+ check_object_size(from, n - over, true);
return __copy_tofrom_user(to, (__force void __user *)from,
n - over) + over;
}
@@ -372,6 +382,10 @@
if (ret == 0)
return 0;
}
+
+ if (!__builtin_constant_p(n))
+ check_object_size(to, n, false);
+
return __copy_tofrom_user((__force void __user *)to, from, n);
}
@@ -398,6 +412,9 @@
if (ret == 0)
return 0;
}
+ if (!__builtin_constant_p(n))
+ check_object_size(from, n, true);
+
return __copy_tofrom_user(to, (__force const void __user *)from, n);
}
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index f5f729c..f0b2385 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -159,6 +159,8 @@
extern void xics_kexec_teardown_cpu(int secondary);
extern void xics_migrate_irqs_away(void);
extern void icp_native_eoi(struct irq_data *d);
+extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type);
+extern int xics_retrigger(struct irq_data *data);
#ifdef CONFIG_SMP
extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
unsigned int strict_check);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index c9bc78e..7429556 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -168,10 +168,10 @@
int n = 0, l = 0;
char buffer[128];
- n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n",
+ n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
edev->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
- pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n",
+ pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
edev->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 41091fd..df6d45e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -144,29 +144,14 @@
* vector
*/
SET_SCRATCH0(r13) /* save r13 */
-#ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
- /* Running native on arch 2.06 or later, check if we are
- * waking up from nap. We only handle no state loss and
- * supervisor state loss. We do -not- handle hypervisor
- * state loss at this time.
+ /*
+ * Running native on arch 2.06 or later, we may wakeup from winkle
+ * inside machine check. If yes, then last bit of HSPGR0 would be set
+ * to 1. Hence clear it unconditionally.
*/
- mfspr r13,SPRN_SRR1
- rlwinm. r13,r13,47-31,30,31
- OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
- beq 9f
-
- mfspr r13,SPRN_SRR1
- rlwinm. r13,r13,47-31,30,31
- /* waking up from powersave (nap) state */
- cmpwi cr1,r13,2
- /* Total loss of HV state is fatal. let's just stay stuck here */
- OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
- bgt cr1,.
-9:
- OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
+ GET_PACA(r13)
+ clrrdi r13,r13,1
+ SET_PACA(r13)
EXCEPTION_PROLOG_0(PACA_EXMC)
BEGIN_FTR_SECTION
b machine_check_powernv_early
@@ -1273,25 +1258,51 @@
* Check if thread was in power saving mode. We come here when any
* of the following is true:
* a. thread wasn't in power saving mode
- * b. thread was in power saving mode with no state loss or
- * supervisor state loss
+ * b. thread was in power saving mode with no state loss,
+ * supervisor state loss or hypervisor state loss.
*
- * Go back to nap again if (b) is true.
+ * Go back to nap/sleep/winkle mode again if (b) is true.
*/
rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
beq 4f /* No, it wasn;t */
/* Thread was in power saving mode. Go back to nap again. */
cmpwi r11,2
- bne 3f
- /* Supervisor state loss */
+ blt 3f
+ /* Supervisor/Hypervisor state loss */
li r0,1
stb r0,PACA_NAPSTATELOST(r13)
3: bl machine_check_queue_event
MACHINE_CHECK_HANDLER_WINDUP
GET_PACA(r13)
ld r1,PACAR1(r13)
- li r3,PNV_THREAD_NAP
- b pnv_enter_arch207_idle_mode
+ /*
+ * Check what idle state this CPU was in and go back to same mode
+ * again.
+ */
+ lbz r3,PACA_THREAD_IDLE_STATE(r13)
+ cmpwi r3,PNV_THREAD_NAP
+ bgt 10f
+ IDLE_STATE_ENTER_SEQ(PPC_NAP)
+ /* No return */
+10:
+ cmpwi r3,PNV_THREAD_SLEEP
+ bgt 2f
+ IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+ /* No return */
+
+2:
+ /*
+ * Go back to winkle. Please note that this thread was woken up in
+ * machine check from winkle and have not restored the per-subcore
+ * state. Hence before going back to winkle, set last bit of HSPGR0
+ * to 1. This will make sure that if this thread gets woken up
+ * again at reset vector 0x100 then it will get chance to restore
+ * the subcore state.
+ */
+ ori r13,r13,1
+ SET_PACA(r13)
+ IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
+ /* No return */
4:
#endif
/*
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index ba79d15..2265c63 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -44,18 +44,6 @@
PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
PSSCR_MTL_MASK
-/* Idle state entry routines */
-
-#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
- /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r0,0(r1); \
- ptesync; \
- ld r0,0(r1); \
-1: cmp cr0,r0,r0; \
- bne 1b; \
- IDLE_INST; \
- b .
-
.text
/*
@@ -363,8 +351,8 @@
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
*/
_GLOBAL(pnv_restore_hyp_resource)
- ld r2,PACATOC(r13);
BEGIN_FTR_SECTION
+ ld r2,PACATOC(r13);
/*
* POWER ISA 3. Use PSSCR to determine if we
* are waking up from deep idle state
@@ -395,6 +383,9 @@
*/
clrldi r5,r13,63
clrrdi r13,r13,1
+
+ /* Now that we are sure r13 is corrected, load TOC */
+ ld r2,PACATOC(r13);
cmpwi cr4,r5,1
mtspr SPRN_HSPRG0,r13
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index ef267fd..5e7ece0 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -92,7 +92,8 @@
mce->in_use = 1;
mce->initiator = MCE_INITIATOR_CPU;
- if (handled)
+ /* Mark it recovered if we have handled it and MSR(RI=1). */
+ if (handled && (regs->msr & MSR_RI))
mce->disposition = MCE_DISPOSITION_RECOVERED;
else
mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index a5c0153..7fdf324 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -78,6 +78,7 @@
static int get_phb_number(struct device_node *dn)
{
int ret, phb_id = -1;
+ u32 prop_32;
u64 prop;
/*
@@ -86,8 +87,10 @@
* reading "ibm,opal-phbid", only present in OPAL environment.
*/
ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
- if (ret)
- ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop);
+ if (ret) {
+ ret = of_property_read_u32_index(dn, "reg", 1, &prop_32);
+ prop = prop_32;
+ }
if (!ret)
phb_id = (int)(prop & (MAX_PHBS - 1));
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 58ccf86..9ee2623 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1074,26 +1074,6 @@
#endif
}
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void flush_tmregs_to_thread(struct task_struct *tsk)
-{
- /*
- * Process self tracing is not yet supported through
- * ptrace interface. Ptrace generic code should have
- * prevented this from happening in the first place.
- * Warn once here with the message, if some how it
- * is attempted.
- */
- WARN_ONCE(tsk == current,
- "Not expecting ptrace on self: TM regs may be incorrect\n");
-
- /*
- * If task is not current, it should have been flushed
- * already to it's thread_struct during __switch_to().
- */
-}
-#endif
-
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
{
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 6ee4b72..4e74fc5 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2940,7 +2940,7 @@
/* Don't print anything after quiesce under OPAL, it crashes OFW */
if (of_platform != PLATFORM_OPAL) {
- prom_printf("Booting Linux via __start() ...\n");
+ prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
prom_debug("->dt_header_start=0x%x\n", hdr);
}
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 4f3c575..bf91658 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -38,6 +38,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/switch_to.h>
+#include <asm/tm.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -118,6 +119,24 @@
REG_OFFSET_END,
};
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+ /*
+ * If task is not current, it will have been flushed already to
+ * it's thread_struct during __switch_to().
+ *
+ * A reclaim flushes ALL the state.
+ */
+
+ if (tsk == current && MSR_TM_SUSPENDED(mfmsr()))
+ tm_reclaim_current(TM_CAUSE_SIGNAL);
+
+}
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
+#endif
+
/**
* regs_query_register_offset() - query register offset from its name
* @name: the name of a register
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index c3e861d..24ec3ea 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -93,15 +93,16 @@
* and we are running with enough of the MMU enabled to have our
* proper kernel virtual addresses
*
- * Find out what kind of machine we're on and save any data we need
- * from the early boot process (devtree is copied on pmac by prom_init()).
- * This is called very early on the boot process, after a minimal
- * MMU environment has been set up but before MMU_init is called.
+ * We do the initial parsing of the flat device-tree and prepares
+ * for the MMU to be fully initialized.
*/
extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
notrace void __init machine_init(u64 dt_ptr)
{
+ /* Configure static keys first, now that we're relocated. */
+ setup_feature_keys();
+
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index eafb9a7..7ac8e6e 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -300,6 +300,7 @@
/* Apply all the dynamic patching */
apply_feature_fixups();
+ setup_feature_keys();
/* Initialize the hash table or TLB handling */
early_init_mmu();
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 6767605..4111d30 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -22,6 +22,7 @@
#include <linux/security.h>
#include <linux/memblock.h>
+#include <asm/cpu_has_feature.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/mmu.h>
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index cbabd14..78a7449 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -30,7 +30,7 @@
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
$(call if_changed,vdso32ld)
# strip rule for the .so file
@@ -39,12 +39,12 @@
$(call if_changed,objcopy)
# assembly rules for the .S files
-$(obj-vdso32): %.o: %.S
+$(obj-vdso32): %.o: %.S FORCE
$(call if_changed_dep,vdso32as)
# actual build commands
quiet_cmd_vdso32ld = VDSO32L $@
- cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+ cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
quiet_cmd_vdso32as = VDSO32A $@
cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index c710802..366ae09 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -23,7 +23,7 @@
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
$(call if_changed,vdso64ld)
# strip rule for the .so file
@@ -32,12 +32,12 @@
$(call if_changed,objcopy)
# assembly rules for the .S files
-$(obj-vdso64): %.o: %.S
+$(obj-vdso64): %.o: %.S FORCE
$(call if_changed_dep,vdso64as)
# actual build commands
quiet_cmd_vdso64ld = VDSO64L $@
- cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+ cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
quiet_cmd_vdso64as = VDSO64A $@
cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index a75ba38..05aa113 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1329,20 +1329,16 @@
xics->kvm = kvm;
/* Already there ? */
- mutex_lock(&kvm->lock);
if (kvm->arch.xics)
ret = -EEXIST;
else
kvm->arch.xics = xics;
- mutex_unlock(&kvm->lock);
if (ret) {
kfree(xics);
return ret;
}
- xics_debugfs_init(xics);
-
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (cpu_has_feature(CPU_FTR_ARCH_206)) {
/* Enable real mode support */
@@ -1354,9 +1350,17 @@
return 0;
}
+static void kvmppc_xics_init(struct kvm_device *dev)
+{
+ struct kvmppc_xics *xics = (struct kvmppc_xics *)dev->private;
+
+ xics_debugfs_init(xics);
+}
+
struct kvm_device_ops kvm_xics_ops = {
.name = "kvm-xics",
.create = kvmppc_xics_create,
+ .init = kvmppc_xics_init,
.destroy = kvmppc_xics_free,
.set_attr = xics_set_attr,
.get_attr = xics_get_attr,
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index d90870a..0a57fe6 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -127,8 +127,9 @@
stw r7,12(r1)
stw r8,8(r1)
- andi. r0,r4,1 /* is destination address even ? */
- cmplwi cr7,r0,0
+ rlwinm r0,r4,3,0x8
+ rlwnm r6,r6,r0,0,31 /* odd destination address: rotate one byte */
+ cmplwi cr7,r0,0 /* is destination address even ? */
addic r12,r6,0
addi r6,r4,-4
neg r0,r4
@@ -237,7 +238,7 @@
66: addze r3,r12
addi r1,r1,16
beqlr+ cr7
- rlwinm r3,r3,8,0,31 /* swap bytes for odd destination */
+ rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */
blr
/* read fault */
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 74145f0..043415f 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -188,7 +188,10 @@
&__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
#endif
do_final_fixups();
+}
+void __init setup_feature_keys(void)
+{
/*
* Initialise jump label. This causes all the cpu/mmu_has_feature()
* checks to take on their correct polarity based on the current set of
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 5be15cf..2975754 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -496,8 +496,10 @@
gang = alloc_spu_gang();
SPUFS_I(inode)->i_ctx = NULL;
SPUFS_I(inode)->i_gang = gang;
- if (!gang)
+ if (!gang) {
+ ret = -ENOMEM;
goto out_iput;
+ }
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 309d9cc..c61667e 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -187,6 +187,11 @@
if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
!firmware_has_feature(FW_FEATURE_LPAR)) {
dev->dev.archdata.dma_ops = &dma_direct_ops;
+ /*
+ * Set the coherent DMA mask to prevent the iommu
+ * being used unnecessarily
+ */
+ dev->dev.coherent_dma_mask = DMA_BIT_MASK(44);
return;
}
#endif
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index e505223b..ed8bba6 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -228,7 +228,8 @@
}
/* Install interrupt handler */
- rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
+ rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW,
+ "opal", NULL);
if (rc) {
irq_dispose_mapping(virq);
pr_warn("Error %d requesting irq %d (0x%x)\n",
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 8b4fc68..6c9a65b 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -399,6 +399,7 @@
if (!(regs->msr & MSR_RI)) {
/* If MSR_RI isn't set, we cannot recover */
+ pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
recovered = 0;
} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
/* Platform corrected itself */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6b95283..fd9444f 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -111,10 +111,17 @@
}
early_param("iommu", iommu_setup);
-static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
+static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
{
- return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
- (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
+ /*
+ * WARNING: We cannot rely on the resource flags. The Linux PCI
+ * allocation code sometimes decides to put a 64-bit prefetchable
+ * BAR in the 32-bit window, so we have to compare the addresses.
+ *
+ * For simplicity we only test resource start.
+ */
+ return (r->start >= phb->ioda.m64_base &&
+ r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
}
static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
@@ -229,7 +236,7 @@
sgsz = phb->ioda.m64_segsize;
for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
r = &pdev->resource[i];
- if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags))
+ if (!r->parent || !pnv_pci_is_m64(phb, r))
continue;
start = _ALIGN_DOWN(r->start - base, sgsz);
@@ -1877,7 +1884,7 @@
unsigned shift, unsigned long index,
unsigned long npages)
{
- __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false);
+ __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
unsigned long start, end, inc;
/* We'll invalidate DMA address in PE scope */
@@ -2863,7 +2870,7 @@
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags)) {
+ if (!pnv_pci_is_m64(phb, res)) {
dev_warn(&pdev->dev, "Don't support SR-IOV with"
" non M64 VF BAR%d: %pR. \n",
i, res);
@@ -2958,7 +2965,7 @@
index++;
}
} else if ((res->flags & IORESOURCE_MEM) &&
- !pnv_pci_is_mem_pref_64(res->flags)) {
+ !pnv_pci_is_m64(phb, res)) {
region.start = res->start -
phb->hose->mem_offset[0] -
phb->ioda.m32_pci_base;
@@ -3083,9 +3090,12 @@
bridge = bridge->bus->self;
}
- /* We fail back to M32 if M64 isn't supported */
- if (phb->ioda.m64_segsize &&
- pnv_pci_is_mem_pref_64(type))
+ /*
+ * We fall back to M32 if M64 isn't supported. We enforce the M64
+ * alignment for any 64-bit resource, PCIe doesn't care and
+ * bridges only do 64-bit prefetchable anyway.
+ */
+ if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64))
return phb->ioda.m64_segsize;
if (type & IORESOURCE_MEM)
return phb->ioda.m32_segsize;
@@ -3125,7 +3135,7 @@
w = NULL;
if (r->flags & type & IORESOURCE_IO)
w = &hose->io_resource;
- else if (pnv_pci_is_mem_pref_64(r->flags) &&
+ else if (pnv_pci_is_m64(phb, r) &&
(type & IORESOURCE_PREFETCH) &&
phb->ioda.m64_segsize)
w = &hose->mem_resources[1];
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 43f7beb..76ec104 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -320,19 +320,6 @@
return dlpar_update_device_tree_lmb(lmb);
}
-static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
-{
- unsigned long section_nr;
- struct mem_section *mem_sect;
- struct memory_block *mem_block;
-
- section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
- mem_sect = __nr_to_section(section_nr);
-
- mem_block = find_memory_block(mem_sect);
- return mem_block;
-}
-
#ifdef CONFIG_MEMORY_HOTREMOVE
static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
{
@@ -420,6 +407,19 @@
static int dlpar_add_lmb(struct of_drconf_cell *);
+static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+{
+ unsigned long section_nr;
+ struct mem_section *mem_sect;
+ struct memory_block *mem_block;
+
+ section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+ mem_sect = __nr_to_section(section_nr);
+
+ mem_block = find_memory_block(mem_sect);
+ return mem_block;
+}
+
static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
{
struct memory_block *mem_block;
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
index 0031eda..385e7aa 100644
--- a/arch/powerpc/sysdev/xics/Kconfig
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -1,6 +1,7 @@
config PPC_XICS
def_bool n
select PPC_SMP_MUXED_IPI
+ select HARDIRQS_SW_RESEND
config PPC_ICP_NATIVE
def_bool n
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
index 27c936c..1c6bf4b 100644
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -156,7 +156,9 @@
.irq_mask = ics_opal_mask_irq,
.irq_unmask = ics_opal_unmask_irq,
.irq_eoi = NULL, /* Patched at init time */
- .irq_set_affinity = ics_opal_set_affinity
+ .irq_set_affinity = ics_opal_set_affinity,
+ .irq_set_type = xics_set_irq_type,
+ .irq_retrigger = xics_retrigger,
};
static int ics_opal_map(struct ics *ics, unsigned int virq);
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index 3854dd4..78ee5c7 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -163,7 +163,9 @@
.irq_mask = ics_rtas_mask_irq,
.irq_unmask = ics_rtas_unmask_irq,
.irq_eoi = NULL, /* Patched at init time */
- .irq_set_affinity = ics_rtas_set_affinity
+ .irq_set_affinity = ics_rtas_set_affinity,
+ .irq_set_type = xics_set_irq_type,
+ .irq_retrigger = xics_retrigger,
};
static int ics_rtas_map(struct ics *ics, unsigned int virq)
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index a795a5f..9d530f4 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -328,8 +328,12 @@
pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
- /* They aren't all level sensitive but we just don't really know */
- irq_set_status_flags(virq, IRQ_LEVEL);
+ /*
+ * Mark interrupts as edge sensitive by default so that resend
+ * actually works. The device-tree parsing will turn the LSIs
+ * back to level.
+ */
+ irq_clear_status_flags(virq, IRQ_LEVEL);
/* Don't call into ICS for IPIs */
if (hw == XICS_IPI) {
@@ -351,13 +355,54 @@
irq_hw_number_t *out_hwirq, unsigned int *out_flags)
{
- /* Current xics implementation translates everything
- * to level. It is not technically right for MSIs but this
- * is irrelevant at this point. We might get smarter in the future
- */
*out_hwirq = intspec[0];
- *out_flags = IRQ_TYPE_LEVEL_LOW;
+ /*
+ * If intsize is at least 2, we look for the type in the second cell,
+ * we assume the LSB indicates a level interrupt.
+ */
+ if (intsize > 1) {
+ if (intspec[1] & 1)
+ *out_flags = IRQ_TYPE_LEVEL_LOW;
+ else
+ *out_flags = IRQ_TYPE_EDGE_RISING;
+ } else
+ *out_flags = IRQ_TYPE_LEVEL_LOW;
+
+ return 0;
+}
+
+int xics_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+ /*
+ * We only support these. This has really no effect other than setting
+ * the corresponding descriptor bits mind you but those will in turn
+ * affect the resend function when re-enabling an edge interrupt.
+ *
+ * Set set the default to edge as explained in map().
+ */
+ if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
+ flow_type = IRQ_TYPE_EDGE_RISING;
+
+ if (flow_type != IRQ_TYPE_EDGE_RISING &&
+ flow_type != IRQ_TYPE_LEVEL_LOW)
+ return -EINVAL;
+
+ irqd_set_trigger_type(d, flow_type);
+
+ return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+int xics_retrigger(struct irq_data *data)
+{
+ /*
+ * We need to push a dummy CPPR when retriggering, since the subsequent
+ * EOI will try to pop it. Passing 0 works, as the function hard codes
+ * the priority value anyway.
+ */
+ xics_push_cppr(0);
+
+ /* Tell the core to do a soft retrigger */
return 0;
}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 9e607bf..e751fe2 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -123,6 +123,7 @@
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_EARLY_PFN_TO_NID
+ select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_ARCH_JUMP_LABEL
select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
select HAVE_ARCH_SECCOMP_FILTER
@@ -871,4 +872,17 @@
Select this option if you want to run the kernel as a guest under
the KVM hypervisor.
+config S390_GUEST_OLD_TRANSPORT
+ def_bool y
+ prompt "Guest support for old s390 virtio transport (DEPRECATED)"
+ depends on S390_GUEST
+ help
+ Enable this option to add support for the old s390-virtio
+ transport (i.e. virtio devices NOT based on virtio-ccw). This
+ type of virtio devices is only available on the experimental
+ kuli userspace or with old (< 2.6) qemu. If you are running
+ with a modern version of qemu (which supports virtio-ccw since
+ 1.4 and uses it by default since version 2.4), you probably won't
+ need this.
+
endmenu
diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S
index f86a4ee..28c4f96 100644
--- a/arch/s390/boot/compressed/head.S
+++ b/arch/s390/boot/compressed/head.S
@@ -21,16 +21,21 @@
lg %r15,.Lstack-.LPG1(%r13)
aghi %r15,-160
brasl %r14,decompress_kernel
- # setup registers for memory mover & branch to target
+ # Set up registers for memory mover. We move the decompressed image to
+ # 0x11000, starting at offset 0x11000 in the decompressed image so
+ # that code living at 0x11000 in the image will end up at 0x11000 in
+ # memory.
lgr %r4,%r2
lg %r2,.Loffset-.LPG1(%r13)
la %r4,0(%r2,%r4)
lg %r3,.Lmvsize-.LPG1(%r13)
lgr %r5,%r3
- # move the memory mover someplace safe
+ # Move the memory mover someplace safe so it doesn't overwrite itself.
la %r1,0x200
mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13)
- # decompress image is started at 0x11000
+ # When the memory mover is done we pass control to
+ # arch/s390/kernel/head64.S:startup_continue which lives at 0x11000 in
+ # the decompressed image.
lgr %r6,%r2
br %r1
mover:
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 889ea34..26e0c7f 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -678,7 +678,7 @@
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index 1bcfd76..24879da 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -616,7 +616,7 @@
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 13ff090..a5c1e5f 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -615,7 +615,7 @@
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 577ae1d..2bad9d8 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -51,6 +51,9 @@
struct kernel_fpu vxstate; \
unsigned long prealign, aligned, remaining; \
\
+ if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \
+ return ___crc32_sw(crc, data, datalen); \
+ \
if ((unsigned long)data & VX_ALIGN_MASK) { \
prealign = VX_ALIGNMENT - \
((unsigned long)data & VX_ALIGN_MASK); \
@@ -59,9 +62,6 @@
data = (void *)((unsigned long)data + prealign); \
} \
\
- if (datalen < VX_MIN_LEN) \
- return ___crc32_sw(crc, data, datalen); \
- \
aligned = datalen & ~VX_ALIGN_MASK; \
remaining = datalen & VX_ALIGN_MASK; \
\
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index ccccebe..73610f2 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -234,7 +234,7 @@
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRC7=m
# CONFIG_XZ_DEC_X86 is not set
# CONFIG_XZ_DEC_POWERPC is not set
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 56e4d82..4431905 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -309,7 +309,9 @@
l %r15,.Lstack-.LPG0(%r13)
ahi %r15,-STACK_FRAME_OVERHEAD
brasl %r14,verify_facilities
- /* Continue with startup code in head64.S */
+# For uncompressed images, continue in
+# arch/s390/kernel/head64.S. For compressed images, continue in
+# arch/s390/boot/compressed/head.S.
jg startup_continue
.Lstack:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3f3ae48..f142215 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1672,6 +1672,7 @@
KVM_SYNC_CRS |
KVM_SYNC_ARCH0 |
KVM_SYNC_PFAULT;
+ kvm_s390_set_prefix(vcpu, 0);
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
/* fprs can be synchronized via vrs, even if the guest has no vx. With
@@ -2361,8 +2362,10 @@
rc = gmap_mprotect_notify(vcpu->arch.gmap,
kvm_s390_get_prefix(vcpu),
PAGE_SIZE * 2, PROT_WRITE);
- if (rc)
+ if (rc) {
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
return rc;
+ }
goto retry;
}
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index e390bbb..48352bf 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -237,11 +237,10 @@
EXPORT_SYMBOL(strrchr);
static inline int clcle(const char *s1, unsigned long l1,
- const char *s2, unsigned long l2,
- int *diff)
+ const char *s2, unsigned long l2)
{
register unsigned long r2 asm("2") = (unsigned long) s1;
- register unsigned long r3 asm("3") = (unsigned long) l2;
+ register unsigned long r3 asm("3") = (unsigned long) l1;
register unsigned long r4 asm("4") = (unsigned long) s2;
register unsigned long r5 asm("5") = (unsigned long) l2;
int cc;
@@ -252,7 +251,6 @@
" srl %0,28"
: "=&d" (cc), "+a" (r2), "+a" (r3),
"+a" (r4), "+a" (r5) : : "cc");
- *diff = *(char *)r2 - *(char *)r4;
return cc;
}
@@ -270,9 +268,9 @@
return (char *) s1;
l1 = __strend(s1) - s1;
while (l1-- >= l2) {
- int cc, dummy;
+ int cc;
- cc = clcle(s1, l1, s2, l2, &dummy);
+ cc = clcle(s1, l2, s2, l2);
if (!cc)
return (char *) s1;
s1++;
@@ -313,11 +311,11 @@
*/
int memcmp(const void *cs, const void *ct, size_t n)
{
- int ret, diff;
+ int ret;
- ret = clcle(cs, n, ct, n, &diff);
+ ret = clcle(cs, n, ct, n);
if (ret)
- ret = diff;
+ ret = ret == 1 ? -1 : 1;
return ret;
}
EXPORT_SYMBOL(memcmp);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index d965961..f481fcd 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -104,6 +104,7 @@
unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
{
+ check_object_size(to, n, false);
if (static_branch_likely(&have_mvcos))
return copy_from_user_mvcos(to, from, n);
return copy_from_user_mvcp(to, from, n);
@@ -177,6 +178,7 @@
unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
{
+ check_object_size(from, n, true);
if (static_branch_likely(&have_mvcos))
return copy_to_user_mvcos(to, from, n);
return copy_to_user_mvcs(to, from, n);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 7104ffb..af7cf28c 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -252,6 +252,8 @@
int rc = -EINVAL;
pgd_t *pgdp;
+ if (addr == end)
+ return 0;
if (end >= MODULES_END)
return -EINVAL;
mutex_lock(&cpa_mutex);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 546293d..59b0960 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -43,6 +43,7 @@
select OLD_SIGSUSPEND
select ARCH_HAS_SG_CHAIN
select CPU_NO_EFFICIENT_FFS
+ select HAVE_ARCH_HARDENED_USERCOPY
config SPARC32
def_bool !64BIT
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
index 57aca27..341a5a1 100644
--- a/arch/sparc/include/asm/uaccess_32.h
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -248,22 +248,28 @@
static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
{
- if (n && __access_ok((unsigned long) to, n))
+ if (n && __access_ok((unsigned long) to, n)) {
+ if (!__builtin_constant_p(n))
+ check_object_size(from, n, true);
return __copy_user(to, (__force void __user *) from, n);
- else
+ } else
return n;
}
static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
{
+ if (!__builtin_constant_p(n))
+ check_object_size(from, n, true);
return __copy_user(to, (__force void __user *) from, n);
}
static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
{
- if (n && __access_ok((unsigned long) from, n))
+ if (n && __access_ok((unsigned long) from, n)) {
+ if (!__builtin_constant_p(n))
+ check_object_size(to, n, false);
return __copy_user((__force void __user *) to, from, n);
- else
+ } else
return n;
}
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index e9a51d6..8bda94f 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -210,8 +210,12 @@
static inline unsigned long __must_check
copy_from_user(void *to, const void __user *from, unsigned long size)
{
- unsigned long ret = ___copy_from_user(to, from, size);
+ unsigned long ret;
+ if (!__builtin_constant_p(size))
+ check_object_size(to, size, false);
+
+ ret = ___copy_from_user(to, from, size);
if (unlikely(ret))
ret = copy_from_user_fixup(to, from, size);
@@ -227,8 +231,11 @@
static inline unsigned long __must_check
copy_to_user(void __user *to, const void *from, unsigned long size)
{
- unsigned long ret = ___copy_to_user(to, from, size);
+ unsigned long ret;
+ if (!__builtin_constant_p(size))
+ check_object_size(from, size, true);
+ ret = ___copy_to_user(to, from, size);
if (unlikely(ret))
ret = copy_to_user_fixup(to, from, size);
return ret;
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index e35632e..62dfc64 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -98,7 +98,7 @@
}
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
- bool write, bool foreign)
+ bool write, bool execute, bool foreign)
{
/* by default, allow everything */
return true;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5c6e747..c580d8c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -80,6 +80,7 @@
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_AOUT if X86_32
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
@@ -91,6 +92,7 @@
select HAVE_ARCH_SOFT_DIRTY if X86_64
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_EBPF_JIT if X86_64
select HAVE_CC_STACKPROTECTOR
select HAVE_CMPXCHG_DOUBLE
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index fe91c25..77f28ce 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -5,6 +5,8 @@
OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
+CFLAGS_syscall_64.o += -Wno-override-init
+CFLAGS_syscall_32.o += -Wno-override-init
obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
obj-y += common.o
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b846875..d172c61 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -288,11 +288,15 @@
jne opportunistic_sysret_failed
/*
- * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET,
- * restoring TF results in a trap from userspace immediately after
- * SYSRET. This would cause an infinite loop whenever #DB happens
- * with register state that satisfies the opportunistic SYSRET
- * conditions. For example, single-stepping this user code:
+ * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
+ * restore RF properly. If the slowpath sets it for whatever reason, we
+ * need to restore it correctly.
+ *
+ * SYSRET can restore TF, but unlike IRET, restoring TF results in a
+ * trap from userspace immediately after SYSRET. This would cause an
+ * infinite loop whenever #DB happens with register state that satisfies
+ * the opportunistic SYSRET conditions. For example, single-stepping
+ * this user code:
*
* movq $stuck_here, %rcx
* pushfq
@@ -601,9 +605,20 @@
.endm
#endif
+/* Make sure APIC interrupt handlers end up in the irqentry section: */
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
+# define POP_SECTION_IRQENTRY .popsection
+#else
+# define PUSH_SECTION_IRQENTRY
+# define POP_SECTION_IRQENTRY
+#endif
+
.macro apicinterrupt num sym do_sym
+PUSH_SECTION_IRQENTRY
apicinterrupt3 \num \sym \do_sym
trace_apicinterrupt \num \sym
+POP_SECTION_IRQENTRY
.endm
#ifdef CONFIG_SMP
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 97a69db..9d35ec0 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -100,6 +100,12 @@
}
}
+static void snb_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
@@ -127,6 +133,7 @@
static struct intel_uncore_ops snb_uncore_msr_ops = {
.init_box = snb_uncore_msr_init_box,
+ .enable_box = snb_uncore_msr_enable_box,
.exit_box = snb_uncore_msr_exit_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
@@ -192,6 +199,12 @@
}
}
+static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
@@ -200,6 +213,7 @@
static struct intel_uncore_ops skl_uncore_msr_ops = {
.init_box = skl_uncore_msr_init_box,
+ .enable_box = skl_uncore_msr_enable_box,
.exit_box = skl_uncore_msr_exit_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 824e540..8aee83b 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2626,7 +2626,7 @@
static struct intel_uncore_type hswep_uncore_ha = {
.name = "ha",
- .num_counters = 5,
+ .num_counters = 4,
.num_boxes = 2,
.perf_ctr_bits = 48,
SNBEP_UNCORE_PCI_COMMON_INIT(),
@@ -2645,7 +2645,7 @@
static struct intel_uncore_type hswep_uncore_imc = {
.name = "imc",
- .num_counters = 5,
+ .num_counters = 4,
.num_boxes = 8,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,
@@ -2691,7 +2691,7 @@
static struct intel_uncore_type hswep_uncore_qpi = {
.name = "qpi",
- .num_counters = 5,
+ .num_counters = 4,
.num_boxes = 3,
.perf_ctr_bits = 48,
.perf_ctr = SNBEP_PCI_PMON_CTR0,
@@ -2773,7 +2773,7 @@
static struct intel_uncore_type hswep_uncore_r3qpi = {
.name = "r3qpi",
- .num_counters = 4,
+ .num_counters = 3,
.num_boxes = 3,
.perf_ctr_bits = 44,
.constraints = hswep_uncore_r3qpi_constraints,
@@ -2972,7 +2972,7 @@
static struct intel_uncore_type bdx_uncore_imc = {
.name = "imc",
- .num_counters = 5,
+ .num_counters = 4,
.num_boxes = 8,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f5befd4..1243577 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -135,6 +135,7 @@
void register_lapic_address(unsigned long address);
extern void setup_boot_APIC_clock(void);
extern void setup_secondary_APIC_clock(void);
+extern void lapic_update_tsc_freq(void);
extern int APIC_init_uniprocessor(void);
#ifdef CONFIG_X86_64
@@ -170,6 +171,7 @@
static inline void disable_local_APIC(void) { }
# define setup_boot_APIC_clock x86_init_noop
# define setup_secondary_APIC_clock x86_init_noop
+static inline void lapic_update_tsc_freq(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 7178043..59405a2 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -22,10 +22,6 @@
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
- /*
- * irq_tlb_count is double-counted in irq_call_count, so it must be
- * subtracted from irq_call_count when displaying irq_call_count
- */
unsigned int irq_tlb_count;
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 2230420..737da62 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -5,10 +5,10 @@
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
void *context; /* context for alloc_pgt_page */
unsigned long pmd_flag; /* page flag for PMD entry */
- bool kernel_mapping; /* kernel mapping or ident mapping */
+ unsigned long offset; /* ident mapping offset */
};
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
- unsigned long addr, unsigned long end);
+ unsigned long pstart, unsigned long pend);
#endif /* _ASM_X86_INIT_H */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 7e8ec7a..1cc82ec 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -145,7 +145,7 @@
*
* | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number
* | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
- * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
+ * | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry
*
* G (8) is aliased and used as a PROT_NONE indicator for
* !present ptes. We need to start storing swap entries above
@@ -156,7 +156,7 @@
#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
#define SWP_TYPE_BITS 5
/* Place the offset above the type: */
-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
+#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 9c6b890..b2988c0 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -58,7 +58,15 @@
extern unsigned char secondary_startup_64[];
#endif
+static inline size_t real_mode_size_needed(void)
+{
+ if (real_mode_header)
+ return 0; /* already allocated. */
+
+ return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
+}
+
+void set_real_mode_mem(phys_addr_t mem, size_t size);
void reserve_real_mode(void);
-void setup_real_mode(void);
#endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 84b5984..8b7c8d8 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -176,6 +176,50 @@
return sp;
}
+/*
+ * Walks up the stack frames to make sure that the specified object is
+ * entirely contained by a single stack frame.
+ *
+ * Returns:
+ * 1 if within a frame
+ * -1 if placed across a frame boundary (or outside stack)
+ * 0 unable to determine (no frame pointers, etc)
+ */
+static inline int arch_within_stack_frames(const void * const stack,
+ const void * const stackend,
+ const void *obj, unsigned long len)
+{
+#if defined(CONFIG_FRAME_POINTER)
+ const void *frame = NULL;
+ const void *oldframe;
+
+ oldframe = __builtin_frame_address(1);
+ if (oldframe)
+ frame = __builtin_frame_address(2);
+ /*
+ * low ----------------------------------------------> high
+ * [saved bp][saved ip][args][local vars][saved bp][saved ip]
+ * ^----------------^
+ * allow copies only within here
+ */
+ while (stack <= frame && frame < stackend) {
+ /*
+ * If obj + len extends past the last frame, this
+ * check won't pass and the next frame will be 0,
+ * causing us to bail out and correctly report
+ * the copy as invalid.
+ */
+ if (obj + len <= frame)
+ return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1;
+ oldframe = frame;
+ frame = *(const void * const *)frame;
+ }
+ return -1;
+#else
+ return 0;
+#endif
+}
+
#else /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4e5be94..6fa8594 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -135,7 +135,14 @@
static inline void __native_flush_tlb(void)
{
+ /*
+ * If current->mm == NULL then we borrow a mm which may change during a
+ * task switch and therefore we must not be preempted while we write CR3
+ * back:
+ */
+ preempt_disable();
native_write_cr3(native_read_cr3());
+ preempt_enable();
}
static inline void __native_flush_tlb_global_irq_disabled(void)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index c03bfb6..a0ae610 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -761,9 +761,10 @@
* case, and do only runtime checking for non-constant sizes.
*/
- if (likely(sz < 0 || sz >= n))
+ if (likely(sz < 0 || sz >= n)) {
+ check_object_size(to, n, false);
n = _copy_from_user(to, from, n);
- else if(__builtin_constant_p(n))
+ } else if (__builtin_constant_p(n))
copy_from_user_overflow();
else
__copy_from_user_overflow(sz, n);
@@ -781,9 +782,10 @@
might_fault();
/* See the comment in copy_from_user() above. */
- if (likely(sz < 0 || sz >= n))
+ if (likely(sz < 0 || sz >= n)) {
+ check_object_size(from, n, true);
n = _copy_to_user(to, from, n);
- else if(__builtin_constant_p(n))
+ } else if (__builtin_constant_p(n))
copy_to_user_overflow();
else
__copy_to_user_overflow(sz, n);
@@ -812,21 +814,21 @@
#define user_access_begin() __uaccess_begin()
#define user_access_end() __uaccess_end()
-#define unsafe_put_user(x, ptr) \
-({ \
+#define unsafe_put_user(x, ptr, err_label) \
+do { \
int __pu_err; \
__put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \
- __builtin_expect(__pu_err, 0); \
-})
+ if (unlikely(__pu_err)) goto err_label; \
+} while (0)
-#define unsafe_get_user(x, ptr) \
-({ \
+#define unsafe_get_user(x, ptr, err_label) \
+do { \
int __gu_err; \
unsigned long __gu_val; \
__get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
- __builtin_expect(__gu_err, 0); \
-})
+ if (unlikely(__gu_err)) goto err_label; \
+} while (0)
#endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 4b32da24..7d3bdd1 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -37,6 +37,7 @@
static __always_inline unsigned long __must_check
__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
{
+ check_object_size(from, n, true);
return __copy_to_user_ll(to, from, n);
}
@@ -95,6 +96,7 @@
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
might_fault();
+ check_object_size(to, n, false);
if (__builtin_constant_p(n)) {
unsigned long ret;
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 2eac2aa..673059a 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -54,6 +54,7 @@
{
int ret = 0;
+ check_object_size(dst, size, false);
if (!__builtin_constant_p(size))
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
@@ -119,6 +120,7 @@
{
int ret = 0;
+ check_object_size(src, size, true);
if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst, src, size);
switch (size) {
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index c852590..e652a7c 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -79,7 +79,7 @@
u16 nasid; /* HNasid */
u16 sockid; /* Socket ID, high bits of APIC ID */
u16 pnode; /* Index to MMR and GRU spaces */
- u32 pxm; /* ACPI proximity domain number */
+ u32 unused2;
u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
};
@@ -88,7 +88,8 @@
#define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */
#define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */
#define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */
-#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_2
+#define UV_SYSTAB_VERSION_UV4_3 0x403 /* - GAM Range PXM Value */
+#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_3
#define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */
#define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 20abd91..cea4fc1 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -313,7 +313,7 @@
/* Clock divisor */
#define APIC_DIVISOR 16
-#define TSC_DIVISOR 32
+#define TSC_DIVISOR 8
/*
* This function sets up the local APIC timer, with a timeout of
@@ -565,13 +565,37 @@
CLOCK_EVT_FEAT_DUMMY);
levt->set_next_event = lapic_next_deadline;
clockevents_config_and_register(levt,
- (tsc_khz / TSC_DIVISOR) * 1000,
+ tsc_khz * (1000 / TSC_DIVISOR),
0xF, ~0UL);
} else
clockevents_register_device(levt);
}
/*
+ * Install the updated TSC frequency from recalibration at the TSC
+ * deadline clockevent devices.
+ */
+static void __lapic_update_tsc_freq(void *info)
+{
+ struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
+
+ if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return;
+
+ clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
+}
+
+void lapic_update_tsc_freq(void)
+{
+ /*
+ * The clockevent device's ->mult and ->shift can both be
+ * changed. In order to avoid races, schedule the frequency
+ * update code on each CPU.
+ */
+ on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
+}
+
+/*
* In this functions we calibrate APIC bus clocks to the external timer.
*
* We want to do the calibration only once since we want to have local timer
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 6368fa6..54f35d9 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -155,7 +155,7 @@
/*
* At CPU state changes, update the x2apic cluster sibling info.
*/
-int x2apic_prepare_cpu(unsigned int cpu)
+static int x2apic_prepare_cpu(unsigned int cpu)
{
if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
return -ENOMEM;
@@ -168,7 +168,7 @@
return 0;
}
-int x2apic_dead_cpu(unsigned int this_cpu)
+static int x2apic_dead_cpu(unsigned int this_cpu)
{
int cpu;
@@ -186,13 +186,18 @@
static int x2apic_cluster_probe(void)
{
int cpu = smp_processor_id();
+ int ret;
if (!x2apic_mode)
return 0;
+ ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
+ x2apic_prepare_cpu, x2apic_dead_cpu);
+ if (ret < 0) {
+ pr_err("Failed to register X2APIC_PREPARE\n");
+ return 0;
+ }
cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
- cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
- x2apic_prepare_cpu, x2apic_dead_cpu);
return 1;
}
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 09b59ad..cb0673c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -223,6 +223,11 @@
if (strncmp(oem_id, "SGI", 3) != 0)
return 0;
+ if (numa_off) {
+ pr_err("UV: NUMA is off, disabling UV support\n");
+ return 0;
+ }
+
/* Setup early hub type field in uv_hub_info for Node 0 */
uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
@@ -325,7 +330,7 @@
struct uv_gam_range_entry *gre = uv_gre_table;
struct uv_gam_range_s *grt;
unsigned long last_limit = 0, ram_limit = 0;
- int bytes, i, sid, lsid = -1;
+ int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;
if (!gre)
return;
@@ -356,11 +361,12 @@
}
sid = gre->sockid - _min_socket;
if (lsid < sid) { /* new range */
- grt = &_gr_table[sid];
- grt->base = lsid;
+ grt = &_gr_table[indx];
+ grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
lsid = sid;
+ lindx = indx++;
continue;
}
if (lsid == sid && !ram_limit) { /* update range */
@@ -371,7 +377,7 @@
}
if (!ram_limit) { /* non-contiguous ram range */
grt++;
- grt->base = sid - 1;
+ grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
continue;
@@ -1155,19 +1161,18 @@
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (!index) {
pr_info("UV: GAM Range Table...\n");
- pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n",
+ pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n",
"Range", "", "Size", "Type", "NASID",
- "SID", "PN", "PXM");
+ "SID", "PN");
}
pr_info(
- "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n",
+ "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
((unsigned long)(gre->limit - lgre)) >>
(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
- gre->type, gre->nasid, gre->sockid,
- gre->pnode, gre->pxm);
+ gre->type, gre->nasid, gre->sockid, gre->pnode);
lgre = gre->limit;
if (sock_min > gre->sockid)
@@ -1286,7 +1291,7 @@
_pnode_to_socket[i] = SOCK_EMPTY;
/* fill in pnode/node/addr conversion list values */
- pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
+ pr_info("UV: GAM Building socket/pnode conversion tables\n");
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
continue;
@@ -1294,20 +1299,18 @@
if (_socket_to_pnode[i] != SOCK_EMPTY)
continue; /* duplicate */
_socket_to_pnode[i] = gre->pnode;
- _socket_to_node[i] = gre->pxm;
i = gre->pnode - minpnode;
_pnode_to_socket[i] = gre->sockid;
pr_info(
- "UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
+ "UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
gre->sockid, gre->type, gre->nasid,
_socket_to_pnode[gre->sockid - minsock],
- _socket_to_node[gre->sockid - minsock],
_pnode_to_socket[gre->pnode - minpnode]);
}
- /* check socket -> node values */
+ /* Set socket -> node values */
lnid = -1;
for_each_present_cpu(cpu) {
int nid = cpu_to_node(cpu);
@@ -1318,14 +1321,9 @@
lnid = nid;
apicid = per_cpu(x86_cpu_to_apicid, cpu);
sockid = apicid >> uv_cpuid.socketid_shift;
- i = sockid - minsock;
-
- if (nid != _socket_to_node[i]) {
- pr_warn(
- "UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
- i, sockid, gre->type, _socket_to_node[i], nid);
- _socket_to_node[i] = nid;
- }
+ _socket_to_node[sockid - minsock] = nid;
+ pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
+ sockid, apicid, nid);
}
/* Setup physical blade to pnode translation from GAM Range Table */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 680049a..01567aa 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -866,105 +866,17 @@
return get_xsave_addr(&fpu->state.xsave, xsave_state);
}
-
-/*
- * Set xfeatures (aka XSTATE_BV) bit for a feature that we want
- * to take out of its "init state". This will ensure that an
- * XRSTOR actually restores the state.
- */
-static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
- int xstate_feature_mask)
-{
- xsave->header.xfeatures |= xstate_feature_mask;
-}
-
-/*
- * This function is safe to call whether the FPU is in use or not.
- *
- * Note that this only works on the current task.
- *
- * Inputs:
- * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
- * XFEATURE_MASK_SSE, etc...)
- * @xsave_state_ptr: a pointer to a copy of the state that you would
- * like written in to the current task's FPU xsave state. This pointer
- * must not be located in the current tasks's xsave area.
- * Output:
- * address of the state in the xsave area or NULL if the state
- * is not present or is in its 'init state'.
- */
-static void fpu__xfeature_set_state(int xstate_feature_mask,
- void *xstate_feature_src, size_t len)
-{
- struct xregs_state *xsave = ¤t->thread.fpu.state.xsave;
- struct fpu *fpu = ¤t->thread.fpu;
- void *dst;
-
- if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
- WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
- return;
- }
-
- /*
- * Tell the FPU code that we need the FPU state to be in
- * 'fpu' (not in the registers), and that we need it to
- * be stable while we write to it.
- */
- fpu__current_fpstate_write_begin();
-
- /*
- * This method *WILL* *NOT* work for compact-format
- * buffers. If the 'xstate_feature_mask' is unset in
- * xcomp_bv then we may need to move other feature state
- * "up" in the buffer.
- */
- if (xsave->header.xcomp_bv & xstate_feature_mask) {
- WARN_ON_ONCE(1);
- goto out;
- }
-
- /* find the location in the xsave buffer of the desired state */
- dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
-
- /*
- * Make sure that the pointer being passed in did not
- * come from the xsave buffer itself.
- */
- WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
-
- /* put the caller-provided data in the location */
- memcpy(dst, xstate_feature_src, len);
-
- /*
- * Mark the xfeature so that the CPU knows there is state
- * in the buffer now.
- */
- fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
-out:
- /*
- * We are done writing to the 'fpu'. Reenable preeption
- * and (possibly) move the fpstate back in to the fpregs.
- */
- fpu__current_fpstate_write_end();
-}
-
#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
/*
- * This will go out and modify the XSAVE buffer so that PKRU is
- * set to a particular state for access to 'pkey'.
- *
- * PKRU state does affect kernel access to user memory. We do
- * not modfiy PKRU *itself* here, only the XSAVE state that will
- * be restored in to PKRU when we return back to userspace.
+ * This will go out and modify PKRU register to set the access
+ * rights for @pkey to @init_val.
*/
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val)
{
- struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
- struct pkru_state *old_pkru_state;
- struct pkru_state new_pkru_state;
+ u32 old_pkru;
int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
u32 new_pkru_bits = 0;
@@ -974,6 +886,15 @@
*/
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return -EINVAL;
+ /*
+ * For most XSAVE components, this would be an arduous task:
+ * brining fpstate up to date with fpregs, updating fpstate,
+ * then re-populating fpregs. But, for components that are
+ * never lazily managed, we can just access the fpregs
+ * directly. PKRU is never managed lazily, so we can just
+ * manipulate it directly. Make sure it stays that way.
+ */
+ WARN_ON_ONCE(!use_eager_fpu());
/* Set the bits we need in PKRU: */
if (init_val & PKEY_DISABLE_ACCESS)
@@ -984,37 +905,12 @@
/* Shift the bits in to the correct place in PKRU for pkey: */
new_pkru_bits <<= pkey_shift;
- /* Locate old copy of the state in the xsave buffer: */
- old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
+ /* Get old PKRU and mask off any old bits in place: */
+ old_pkru = read_pkru();
+ old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
- /*
- * When state is not in the buffer, it is in the init
- * state, set it manually. Otherwise, copy out the old
- * state.
- */
- if (!old_pkru_state)
- new_pkru_state.pkru = 0;
- else
- new_pkru_state.pkru = old_pkru_state->pkru;
-
- /* Mask off any old bits in place: */
- new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
-
- /* Set the newly-requested bits: */
- new_pkru_state.pkru |= new_pkru_bits;
-
- /*
- * We could theoretically live without zeroing pkru.pad.
- * The current XSAVE feature state definition says that
- * only bytes 0->3 are used. But we do not want to
- * chance leaking kernel stack out to userspace in case a
- * memcpy() of the whole xsave buffer was done.
- *
- * They're in the same cacheline anyway.
- */
- new_pkru_state.pad = 0;
-
- fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
+ /* Write old part along with new part: */
+ write_pkru(old_pkru | new_pkru_bits);
return 0;
}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 2dda0bc..f16c55b 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -25,8 +25,6 @@
/* Initialize 32bit specific setup functions */
x86_init.resources.reserve_resources = i386_reserve_resources;
x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
-
- reserve_bios_regions();
}
asmlinkage __visible void __init i386_start_kernel(void)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 99d48e7..54a2372 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -183,7 +183,6 @@
copy_bootdata(__va(real_mode_data));
x86_early_init_platform_quirks();
- reserve_bios_regions();
switch (boot_params.hdr.hardware_subarch) {
case X86_SUBARCH_INTEL_MID:
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ed16e58..c6dfd80 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1242,7 +1242,7 @@
memset(&curr_time, 0, sizeof(struct rtc_time));
if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
- mc146818_set_time(&curr_time);
+ mc146818_get_time(&curr_time);
if (hpet_rtc_flags & RTC_UIE &&
curr_time.tm_sec != hpet_prev_update_sec) {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 61521dc..9f669fd 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -102,8 +102,7 @@
seq_puts(p, " Rescheduling interrupts\n");
seq_printf(p, "%*s: ", prec, "CAL");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
- irq_stats(j)->irq_tlb_count);
+ seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
seq_puts(p, " Function call interrupts\n");
seq_printf(p, "%*s: ", prec, "TLB");
for_each_online_cpu(j)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 991b779..0fa60f5 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -936,8 +936,6 @@
x86_init.oem.arch_setup();
- kernel_randomize_memory();
-
iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
setup_memory_map();
parse_setup_data();
@@ -1055,6 +1053,12 @@
max_possible_pfn = max_pfn;
+ /*
+ * Define random base addresses for memory sections after max_pfn is
+ * defined and before each memory section base is used.
+ */
+ kernel_randomize_memory();
+
#ifdef CONFIG_X86_32
/* max_low_pfn get updated here */
find_low_pfn_range();
@@ -1097,6 +1101,8 @@
efi_find_mirror();
}
+ reserve_bios_regions();
+
/*
* The EFI specification says that boot service code won't be called
* after ExitBootServices(). This is, in fact, a lie.
@@ -1125,7 +1131,15 @@
early_trap_pf_init();
- setup_real_mode();
+ /*
+ * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
+ * with the current CR4 value. This may not be necessary, but
+ * auditing all the early-boot CR4 manipulation would be needed to
+ * rule it out.
+ */
+ if (boot_cpu_data.cpuid_level >= 0)
+ /* A CPU has %cr4 if and only if it has CPUID. */
+ mmu_cr4_features = __read_cr4();
memblock_set_current_limit(get_max_mapped());
@@ -1174,13 +1188,6 @@
kasan_init();
- if (boot_cpu_data.cpuid_level >= 0) {
- /* A CPU has %cr4 if and only if it has CPUID */
- mmu_cr4_features = __read_cr4();
- if (trampoline_cr4_features)
- *trampoline_cr4_features = mmu_cr4_features;
- }
-
#ifdef CONFIG_X86_32
/* sync back kernel address range */
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 1ef87e8..78b9cb5 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -22,6 +22,7 @@
#include <asm/nmi.h>
#include <asm/x86_init.h>
#include <asm/geode.h>
+#include <asm/apic.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -1249,6 +1250,9 @@
(unsigned long)tsc_khz / 1000,
(unsigned long)tsc_khz % 1000);
+ /* Inform the TSC deadline clockevent devices about the recalibration */
+ lapic_update_tsc_freq();
+
out:
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 6c1ff31..495c776 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -357,20 +357,22 @@
*cursor &= 0xfe;
}
/*
- * Similar treatment for VEX3 prefix.
- * TODO: add XOP/EVEX treatment when insn decoder supports them
+ * Similar treatment for VEX3/EVEX prefix.
+ * TODO: add XOP treatment when insn decoder supports them
*/
- if (insn->vex_prefix.nbytes == 3) {
+ if (insn->vex_prefix.nbytes >= 3) {
/*
* vex2: c5 rvvvvLpp (has no b bit)
* vex3/xop: c4/8f rxbmmmmm wvvvvLpp
* evex: 62 rxbR00mm wvvvv1pp zllBVaaa
- * (evex will need setting of both b and x since
- * in non-sib encoding evex.x is 4th bit of MODRM.rm)
- * Setting VEX3.b (setting because it has inverted meaning):
+ * Setting VEX3.b (setting because it has inverted meaning).
+ * Setting EVEX.x since (in non-SIB encoding) EVEX.x
+ * is the 4th bit of MODRM.rm, and needs the same treatment.
+ * For VEX3-encoded insns, VEX3.x value has no effect in
+ * non-SIB encoding, the change is superfluous but harmless.
*/
cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
- *cursor |= 0x20;
+ *cursor |= 0x60;
}
/*
@@ -415,12 +417,10 @@
reg = MODRM_REG(insn); /* Fetch modrm.reg */
reg2 = 0xff; /* Fetch vex.vvvv */
- if (insn->vex_prefix.nbytes == 2)
- reg2 = insn->vex_prefix.bytes[1];
- else if (insn->vex_prefix.nbytes == 3)
+ if (insn->vex_prefix.nbytes)
reg2 = insn->vex_prefix.bytes[2];
/*
- * TODO: add XOP, EXEV vvvv reading.
+ * TODO: add XOP vvvv reading.
*
* vex.vvvv field is in bits 6-3, bits are inverted.
* But in 32-bit mode, high-order bit may be ignored.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 3235e0f..afa7bbb 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -366,7 +366,8 @@
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
- F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);
+ F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
+ F(AVX512BW) | F(AVX512VL);
/* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features =
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b62c852..23b99f3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1761,9 +1761,10 @@
if (value & MSR_IA32_APICBASE_ENABLE) {
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
static_key_slow_dec_deferred(&apic_hw_disabled);
- } else
+ } else {
static_key_slow_inc(&apic_hw_disabled.key);
- recalculate_apic_map(vcpu->kvm);
+ recalculate_apic_map(vcpu->kvm);
+ }
}
if ((old_value ^ value) & X2APIC_ENABLE) {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3d4cc8cc..d9c7e98 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1207,7 +1207,7 @@
*
* Return true if tlb need be flushed.
*/
-static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
+static bool spte_write_protect(u64 *sptep, bool pt_protect)
{
u64 spte = *sptep;
@@ -1233,12 +1233,12 @@
bool flush = false;
for_each_rmap_spte(rmap_head, &iter, sptep)
- flush |= spte_write_protect(kvm, sptep, pt_protect);
+ flush |= spte_write_protect(sptep, pt_protect);
return flush;
}
-static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep)
+static bool spte_clear_dirty(u64 *sptep)
{
u64 spte = *sptep;
@@ -1256,12 +1256,12 @@
bool flush = false;
for_each_rmap_spte(rmap_head, &iter, sptep)
- flush |= spte_clear_dirty(kvm, sptep);
+ flush |= spte_clear_dirty(sptep);
return flush;
}
-static bool spte_set_dirty(struct kvm *kvm, u64 *sptep)
+static bool spte_set_dirty(u64 *sptep)
{
u64 spte = *sptep;
@@ -1279,7 +1279,7 @@
bool flush = false;
for_each_rmap_spte(rmap_head, &iter, sptep)
- flush |= spte_set_dirty(kvm, sptep);
+ flush |= spte_set_dirty(sptep);
return flush;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a45d858..5cede40 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -422,6 +422,7 @@
struct list_head vmcs02_pool;
int vmcs02_num;
u64 vmcs01_tsc_offset;
+ bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
/*
@@ -435,6 +436,8 @@
bool pi_pending;
u16 posted_intr_nv;
+ unsigned long *msr_bitmap;
+
struct hrtimer preemption_timer;
bool preemption_timer_expired;
@@ -924,7 +927,6 @@
static unsigned long *vmx_msr_bitmap_longmode;
static unsigned long *vmx_msr_bitmap_legacy_x2apic;
static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_nested;
static unsigned long *vmx_vmread_bitmap;
static unsigned long *vmx_vmwrite_bitmap;
@@ -2198,6 +2200,12 @@
new.control) != old.control);
}
+static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
+{
+ vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
+ vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+}
+
/*
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
@@ -2256,10 +2264,8 @@
/* Setup TSC multiplier */
if (kvm_has_tsc_control &&
- vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
- vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
- vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
- }
+ vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
+ decache_tsc_multiplier(vmx);
vmx_vcpu_pi_load(vcpu, cpu);
vmx->host_pkru = read_pkru();
@@ -2508,7 +2514,7 @@
unsigned long *msr_bitmap;
if (is_guest_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_nested;
+ msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
else if (cpu_has_secondary_exec_ctrls() &&
(vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
@@ -6363,13 +6369,6 @@
if (!vmx_msr_bitmap_longmode_x2apic)
goto out4;
- if (nested) {
- vmx_msr_bitmap_nested =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_nested)
- goto out5;
- }
-
vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_vmread_bitmap)
goto out6;
@@ -6392,8 +6391,6 @@
memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
- if (nested)
- memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
@@ -6529,9 +6526,6 @@
out7:
free_page((unsigned long)vmx_vmread_bitmap);
out6:
- if (nested)
- free_page((unsigned long)vmx_msr_bitmap_nested);
-out5:
free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
out4:
free_page((unsigned long)vmx_msr_bitmap_longmode);
@@ -6557,8 +6551,6 @@
free_page((unsigned long)vmx_io_bitmap_a);
free_page((unsigned long)vmx_vmwrite_bitmap);
free_page((unsigned long)vmx_vmread_bitmap);
- if (nested)
- free_page((unsigned long)vmx_msr_bitmap_nested);
free_kvm_area();
}
@@ -6995,16 +6987,21 @@
return 1;
}
+ if (cpu_has_vmx_msr_bitmap()) {
+ vmx->nested.msr_bitmap =
+ (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx->nested.msr_bitmap)
+ goto out_msr_bitmap;
+ }
+
vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
if (!vmx->nested.cached_vmcs12)
- return -ENOMEM;
+ goto out_cached_vmcs12;
if (enable_shadow_vmcs) {
shadow_vmcs = alloc_vmcs();
- if (!shadow_vmcs) {
- kfree(vmx->nested.cached_vmcs12);
- return -ENOMEM;
- }
+ if (!shadow_vmcs)
+ goto out_shadow_vmcs;
/* mark vmcs as shadow */
shadow_vmcs->revision_id |= (1u << 31);
/* init shadow vmcs */
@@ -7024,6 +7021,15 @@
skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
return 1;
+
+out_shadow_vmcs:
+ kfree(vmx->nested.cached_vmcs12);
+
+out_cached_vmcs12:
+ free_page((unsigned long)vmx->nested.msr_bitmap);
+
+out_msr_bitmap:
+ return -ENOMEM;
}
/*
@@ -7098,6 +7104,10 @@
vmx->nested.vmxon = false;
free_vpid(vmx->nested.vpid02);
nested_release_vmcs12(vmx);
+ if (vmx->nested.msr_bitmap) {
+ free_page((unsigned long)vmx->nested.msr_bitmap);
+ vmx->nested.msr_bitmap = NULL;
+ }
if (enable_shadow_vmcs)
free_vmcs(vmx->nested.current_shadow_vmcs);
kfree(vmx->nested.cached_vmcs12);
@@ -8419,6 +8429,12 @@
{
u32 sec_exec_control;
+ /* Postpone execution until vmcs01 is the current VMCS. */
+ if (is_guest_mode(vcpu)) {
+ to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+ return;
+ }
+
/*
* There is not point to enable virtualize x2apic without enable
* apicv
@@ -9472,8 +9488,10 @@
{
int msr;
struct page *page;
- unsigned long *msr_bitmap;
+ unsigned long *msr_bitmap_l1;
+ unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
+ /* This shortcut is ok because we support only x2APIC MSRs so far. */
if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
return false;
@@ -9482,63 +9500,37 @@
WARN_ON(1);
return false;
}
- msr_bitmap = (unsigned long *)kmap(page);
- if (!msr_bitmap) {
+ msr_bitmap_l1 = (unsigned long *)kmap(page);
+ if (!msr_bitmap_l1) {
nested_release_page_clean(page);
WARN_ON(1);
return false;
}
+ memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+
if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
if (nested_cpu_has_apic_reg_virt(vmcs12))
for (msr = 0x800; msr <= 0x8ff; msr++)
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
msr, MSR_TYPE_R);
- /* TPR is allowed */
- nested_vmx_disable_intercept_for_msr(msr_bitmap,
- vmx_msr_bitmap_nested,
+
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_TASKPRI >> 4),
MSR_TYPE_R | MSR_TYPE_W);
+
if (nested_cpu_has_vid(vmcs12)) {
- /* EOI and self-IPI are allowed */
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_EOI >> 4),
MSR_TYPE_W);
nested_vmx_disable_intercept_for_msr(
- msr_bitmap,
- vmx_msr_bitmap_nested,
+ msr_bitmap_l1, msr_bitmap_l0,
APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
MSR_TYPE_W);
}
- } else {
- /*
- * Enable reading intercept of all the x2apic
- * MSRs. We should not rely on vmcs12 to do any
- * optimizations here, it may have been modified
- * by L1.
- */
- for (msr = 0x800; msr <= 0x8ff; msr++)
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- msr,
- MSR_TYPE_R);
-
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_TASKPRI >> 4),
- MSR_TYPE_W);
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_EOI >> 4),
- MSR_TYPE_W);
- __vmx_enable_intercept_for_msr(
- vmx_msr_bitmap_nested,
- APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
- MSR_TYPE_W);
}
kunmap(page);
nested_release_page_clean(page);
@@ -9957,10 +9949,10 @@
}
if (cpu_has_vmx_msr_bitmap() &&
- exec_control & CPU_BASED_USE_MSR_BITMAPS) {
- nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
- /* MSR_BITMAP will be set by following vmx_set_efer. */
- } else
+ exec_control & CPU_BASED_USE_MSR_BITMAPS &&
+ nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
+ ; /* MSR_BITMAP will be set by following vmx_set_efer. */
+ else
exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
/*
@@ -10011,6 +10003,8 @@
vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
else
vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+ if (kvm_has_tsc_control)
+ decache_tsc_multiplier(vmx);
if (enable_vpid) {
/*
@@ -10767,6 +10761,14 @@
else
vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
+ if (kvm_has_tsc_control)
+ decache_tsc_multiplier(vmx);
+
+ if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
+ vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
+ vmx_set_virtual_x2apic_mode(vcpu,
+ vcpu->arch.apic_base & X2APIC_ENABLE);
+ }
/* This is needed for same reason as it was needed in prepare_vmcs02 */
vmx->host_rsp = 0;
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
index 02de3d7..8a602a1 100644
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -35,6 +35,7 @@
ENTRY(__sw_hweight64)
#ifdef CONFIG_X86_64
+ pushq %rdi
pushq %rdx
movq %rdi, %rdx # w -> t
@@ -60,6 +61,7 @@
shrq $56, %rax # w = w_tmp >> 56
popq %rdx
+ popq %rdi
ret
#else /* CONFIG_X86_32 */
/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index f7dfeda..121f59c 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -19,7 +19,7 @@
#include <asm/cpufeature.h>
#include <asm/setup.h>
-#define debug_putstr(v) early_printk(v)
+#define debug_putstr(v) early_printk("%s", v)
#define has_cpuflag(f) boot_cpu_has(f)
#define get_boot_seed() kaslr_offset()
#endif
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index ec21796..4473cb4 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -3,15 +3,17 @@
* included by both the compressed kernel and the regular kernel.
*/
-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
unsigned long addr, unsigned long end)
{
addr &= PMD_MASK;
for (; addr < end; addr += PMD_SIZE) {
pmd_t *pmd = pmd_page + pmd_index(addr);
- if (!pmd_present(*pmd))
- set_pmd(pmd, __pmd(addr | pmd_flag));
+ if (pmd_present(*pmd))
+ continue;
+
+ set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
}
}
@@ -30,13 +32,13 @@
if (pud_present(*pud)) {
pmd = pmd_offset(pud, 0);
- ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ ident_pmd_init(info, pmd, addr, next);
continue;
}
pmd = (pmd_t *)info->alloc_pgt_page(info->context);
if (!pmd)
return -ENOMEM;
- ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ ident_pmd_init(info, pmd, addr, next);
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
}
@@ -44,14 +46,15 @@
}
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
- unsigned long addr, unsigned long end)
+ unsigned long pstart, unsigned long pend)
{
+ unsigned long addr = pstart + info->offset;
+ unsigned long end = pend + info->offset;
unsigned long next;
int result;
- int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
for (; addr < end; addr = next) {
- pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+ pgd_t *pgd = pgd_page + pgd_index(addr);
pud_t *pud;
next = (addr & PGDIR_MASK) + PGDIR_SIZE;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6209289..d28a2d7 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -122,8 +122,18 @@
return __va(pfn << PAGE_SHIFT);
}
-/* need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS */
-#define INIT_PGT_BUF_SIZE (6 * PAGE_SIZE)
+/*
+ * By default need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS.
+ * With KASLR memory randomization, depending on the machine e820 memory
+ * and the PUD alignment. We may need twice more pages when KASLR memory
+ * randomization is enabled.
+ */
+#ifndef CONFIG_RANDOMIZE_MEMORY
+#define INIT_PGD_PAGE_COUNT 6
+#else
+#define INIT_PGD_PAGE_COUNT 12
+#endif
+#define INIT_PGT_BUF_SIZE (INIT_PGD_PAGE_COUNT * PAGE_SIZE)
RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
void __init early_alloc_pgt_buf(void)
{
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 26dccd6..ec8654f 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -97,7 +97,7 @@
* add padding if needed (especially for memory hotplug support).
*/
BUG_ON(kaslr_regions[0].base != &page_offset_base);
- memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) +
+ memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
/* Adapt phyiscal memory region size based on available memory */
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 4480c06..89d1146 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -254,6 +254,7 @@
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+ size_t rm_size;
if (md->type != EFI_BOOT_SERVICES_CODE &&
md->type != EFI_BOOT_SERVICES_DATA)
@@ -263,6 +264,26 @@
if (md->attribute & EFI_MEMORY_RUNTIME)
continue;
+ /*
+ * Nasty quirk: if all sub-1MB memory is used for boot
+ * services, we can get here without having allocated the
+ * real mode trampoline. It's too late to hand boot services
+ * memory back to the memblock allocator, so instead
+ * try to manually allocate the trampoline if needed.
+ *
+ * I've seen this on a Dell XPS 13 9350 with firmware
+ * 1.4.4 with SGX enabled booting Linux via Fedora 24's
+ * grub2-efi on a hard disk. (And no, I don't know why
+ * this happened, but Linux should still try to boot rather
+ * panicing early.)
+ */
+ rm_size = real_mode_size_needed();
+ if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) {
+ set_real_mode_mem(start, rm_size);
+ start += rm_size;
+ size -= rm_size;
+ }
+
free_bootmem_late(start, size);
}
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 66b2166..23f2f3e 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -187,7 +187,8 @@
void uv_bios_init(void)
{
uv_systab = NULL;
- if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) {
+ if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+ !efi.uv_systab || efi_runtime_disabled()) {
pr_crit("UV: UVsystab: missing\n");
return;
}
@@ -199,12 +200,14 @@
return;
}
+ /* Starting with UV4 the UV systab size is variable */
if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) {
+ int size = uv_systab->size;
+
iounmap(uv_systab);
- uv_systab = ioremap(efi.uv_systab, uv_systab->size);
+ uv_systab = ioremap(efi.uv_systab, size);
if (!uv_systab) {
- pr_err("UV: UVsystab: ioremap(%d) failed!\n",
- uv_systab->size);
+ pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
return;
}
}
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index f0b5f2d..a3e3ccc 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -87,7 +87,7 @@
struct x86_mapping_info info = {
.alloc_pgt_page = alloc_pgt_page,
.pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
- .kernel_mapping = true,
+ .offset = __PAGE_OFFSET,
};
unsigned long mstart, mend;
pgd_t *pgd;
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 705e3ff..5db706f1 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -1,9 +1,11 @@
#include <linux/io.h>
+#include <linux/slab.h>
#include <linux/memblock.h>
#include <asm/cacheflush.h>
#include <asm/pgtable.h>
#include <asm/realmode.h>
+#include <asm/tlbflush.h>
struct real_mode_header *real_mode_header;
u32 *trampoline_cr4_features;
@@ -11,25 +13,37 @@
/* Hold the pgd entry used on booting additional CPUs */
pgd_t trampoline_pgd_entry;
-void __init reserve_real_mode(void)
+void __init set_real_mode_mem(phys_addr_t mem, size_t size)
{
- phys_addr_t mem;
- unsigned char *base;
- size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
+ void *base = __va(mem);
- /* Has to be under 1M so we can execute real-mode AP code. */
- mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
- if (!mem)
- panic("Cannot allocate trampoline\n");
-
- base = __va(mem);
- memblock_reserve(mem, size);
real_mode_header = (struct real_mode_header *) base;
printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
base, (unsigned long long)mem, size);
}
-void __init setup_real_mode(void)
+void __init reserve_real_mode(void)
+{
+ phys_addr_t mem;
+ size_t size = real_mode_size_needed();
+
+ if (!size)
+ return;
+
+ WARN_ON(slab_is_available());
+
+ /* Has to be under 1M so we can execute real-mode AP code. */
+ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
+ if (!mem) {
+ pr_info("No sub-1M memory is available for the trampoline\n");
+ return;
+ }
+
+ memblock_reserve(mem, size);
+ set_real_mode_mem(mem, size);
+}
+
+static void __init setup_real_mode(void)
{
u16 real_mode_seg;
const u32 *rel;
@@ -84,7 +98,7 @@
trampoline_header->start = (u64) secondary_startup_64;
trampoline_cr4_features = &trampoline_header->cr4;
- *trampoline_cr4_features = __read_cr4();
+ *trampoline_cr4_features = mmu_cr4_features;
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
@@ -100,7 +114,7 @@
* need to mark it executable at do_pre_smp_initcalls() at least,
* thus run it as a early_initcall().
*/
-static int __init set_real_mode_permissions(void)
+static void __init set_real_mode_permissions(void)
{
unsigned char *base = (unsigned char *) real_mode_header;
size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
@@ -119,7 +133,16 @@
set_memory_nx((unsigned long) base, size >> PAGE_SHIFT);
set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT);
set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT);
+}
+
+static int __init init_real_mode(void)
+{
+ if (!real_mode_header)
+ panic("Real mode trampoline was not allocated");
+
+ setup_real_mode();
+ set_real_mode_permissions();
return 0;
}
-early_initcall(set_real_mode_permissions);
+early_initcall(init_real_mode);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index a9377be..84d7148 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -439,7 +439,7 @@
config CRYPT_CRC32C_VPMSUM
tristate "CRC32c CRC algorithm (powerpc64)"
- depends on PPC64
+ depends on PPC64 && ALTIVEC
select CRYPTO_HASH
select CRC32
help
diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c
index 6226439..7e8ed96 100644
--- a/crypto/sha3_generic.c
+++ b/crypto/sha3_generic.c
@@ -24,14 +24,14 @@
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
static const u64 keccakf_rndc[24] = {
- 0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
- 0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
- 0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
- 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
- 0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
- 0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
- 0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
- 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+ 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL,
+ 0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL,
+ 0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008aULL,
+ 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL,
+ 0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL,
+ 0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL,
+ 0x000000000000800aULL, 0x800000008000000aULL, 0x8000000080008081ULL,
+ 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
};
static const int keccakf_rotc[24] = {
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 8c234dd..80cc7c0 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1527,11 +1527,12 @@
{
struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
u64 offset = nfit_blk->stat_offset + mmio->size * bw;
+ const u32 STATUS_MASK = 0x80000037;
if (mmio->num_lines)
offset = to_interleave_offset(offset, mmio);
- return readl(mmio->addr.base + offset);
+ return readl(mmio->addr.base + offset) & STATUS_MASK;
}
static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 1a04af6..6c6519f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3950,6 +3950,7 @@
bool need_put = !!rbd_dev->opts;
ceph_oid_destroy(&rbd_dev->header_oid);
+ ceph_oloc_destroy(&rbd_dev->header_oloc);
rbd_put_client(rbd_dev->rbd_client);
rbd_spec_put(rbd_dev->spec);
@@ -5336,15 +5337,6 @@
}
spec->pool_id = (u64)rc;
- /* The ceph file layout needs to fit pool id in 32 bits */
-
- if (spec->pool_id > (u64)U32_MAX) {
- rbd_warn(NULL, "pool id too large (%llu > %u)",
- (unsigned long long)spec->pool_id, U32_MAX);
- rc = -EIO;
- goto err_out_client;
- }
-
rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
if (!rbd_dev) {
rc = -ENOMEM;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1523e05..93b1aaa 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -391,23 +391,17 @@
num_vqs = 1;
vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
- if (!vblk->vqs) {
+ if (!vblk->vqs)
+ return -ENOMEM;
+
+ names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
+ callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
+ vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
+ if (!names || !callbacks || !vqs) {
err = -ENOMEM;
goto out;
}
- names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
- if (!names)
- goto err_names;
-
- callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
- if (!callbacks)
- goto err_callbacks;
-
- vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
- if (!vqs)
- goto err_vqs;
-
for (i = 0; i < num_vqs; i++) {
callbacks[i] = virtblk_done;
snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
@@ -417,7 +411,7 @@
/* Discover virtqueues and write information to configuration. */
err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
if (err)
- goto err_find_vqs;
+ goto out;
for (i = 0; i < num_vqs; i++) {
spin_lock_init(&vblk->vqs[i].lock);
@@ -425,16 +419,12 @@
}
vblk->num_vqs = num_vqs;
- err_find_vqs:
+out:
kfree(vqs);
- err_vqs:
kfree(callbacks);
- err_callbacks:
kfree(names);
- err_names:
if (err)
kfree(vblk->vqs);
- out:
return err;
}
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 28bce3f..5770054 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -8,6 +8,9 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+
+#define pr_fmt(fmt) "arm_arch_timer: " fmt
+
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/device.h>
@@ -370,16 +373,33 @@
arch_timer_ppi[PHYS_NONSECURE_PPI]);
}
+static u32 check_ppi_trigger(int irq)
+{
+ u32 flags = irq_get_trigger_type(irq);
+
+ if (flags != IRQF_TRIGGER_HIGH && flags != IRQF_TRIGGER_LOW) {
+ pr_warn("WARNING: Invalid trigger for IRQ%d, assuming level low\n", irq);
+ pr_warn("WARNING: Please fix your firmware\n");
+ flags = IRQF_TRIGGER_LOW;
+ }
+
+ return flags;
+}
+
static int arch_timer_starting_cpu(unsigned int cpu)
{
struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
+ u32 flags;
__arch_timer_setup(ARCH_CP15_TIMER, clk);
- enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], 0);
+ flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]);
+ enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags);
- if (arch_timer_has_nonsecure_ppi())
- enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0);
+ if (arch_timer_has_nonsecure_ppi()) {
+ flags = check_ppi_trigger(arch_timer_ppi[PHYS_NONSECURE_PPI]);
+ enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], flags);
+ }
arch_counter_set_user_access();
if (evtstrm_enable)
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 87796e0..d3ffde8 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -145,11 +145,30 @@
/* Use following macros for conversions between pstate_id and index */
static inline int idx_to_pstate(unsigned int i)
{
+ if (unlikely(i >= powernv_pstate_info.nr_pstates)) {
+ pr_warn_once("index %u is out of bound\n", i);
+ return powernv_freqs[powernv_pstate_info.nominal].driver_data;
+ }
+
return powernv_freqs[i].driver_data;
}
static inline unsigned int pstate_to_idx(int pstate)
{
+ int min = powernv_freqs[powernv_pstate_info.min].driver_data;
+ int max = powernv_freqs[powernv_pstate_info.max].driver_data;
+
+ if (min > 0) {
+ if (unlikely((pstate < max) || (pstate > min))) {
+ pr_warn_once("pstate %d is out of bound\n", pstate);
+ return powernv_pstate_info.nominal;
+ }
+ } else {
+ if (unlikely((pstate > max) || (pstate < min))) {
+ pr_warn_once("pstate %d is out of bound\n", pstate);
+ return powernv_pstate_info.nominal;
+ }
+ }
/*
* abs() is deliberately used so that is works with
* both monotonically increasing and decreasing
@@ -593,7 +612,7 @@
} else {
gpstate_idx = calc_global_pstate(gpstates->elapsed_time,
gpstates->highest_lpstate_idx,
- freq_data.pstate_id);
+ gpstates->last_lpstate_idx);
}
/*
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index ea8189f..6dc5971 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -441,6 +441,9 @@
OP_ALG_AAI_CTR_MOD128);
const bool is_rfc3686 = alg->caam.rfc3686;
+ if (!ctx->authsize)
+ return 0;
+
/* NULL encryption / decryption */
if (!ctx->enckeylen)
return aead_null_set_sh_desc(aead);
@@ -614,7 +617,7 @@
keys_fit_inline = true;
/* aead_givencrypt shared descriptor */
- desc = ctx->sh_desc_givenc;
+ desc = ctx->sh_desc_enc;
/* Note: Context registers are saved. */
init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
@@ -645,13 +648,13 @@
append_operation(desc, ctx->class2_alg_type |
OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
- /* ivsize + cryptlen = seqoutlen - authsize */
- append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
-
/* Read and write assoclen bytes */
append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+ /* ivsize + cryptlen = seqoutlen - authsize */
+ append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
+
/* Skip assoc data */
append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
@@ -697,7 +700,7 @@
ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
desc_bytes(desc),
DMA_TO_DEVICE);
- if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) {
+ if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
dev_err(jrdev, "unable to map shared descriptor\n");
return -ENOMEM;
}
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index f1ecc8d..36365b3 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -1898,6 +1898,7 @@
template->name);
snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
template->driver_name);
+ t_alg->ahash_alg.setkey = NULL;
}
alg->cra_module = THIS_MODULE;
alg->cra_init = caam_hash_cra_init;
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 4fb2eb7..ce0067b 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -552,9 +552,9 @@
/* Knight's Landing Support */
/*
* KNL's memory channels are swizzled between memory controllers.
- * MC0 is mapped to CH3,5,6 and MC1 is mapped to CH0,1,2
+ * MC0 is mapped to CH3,4,5 and MC1 is mapped to CH0,1,2
*/
-#define knl_channel_remap(channel) ((channel + 3) % 6)
+#define knl_channel_remap(mc, chan) ((mc) ? (chan) : (chan) + 3)
/* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */
#define PCI_DEVICE_ID_INTEL_KNL_IMC_MC 0x7840
@@ -1286,7 +1286,7 @@
mc = GET_BITFIELD(reg, entry*3, (entry*3)+2);
chan = GET_BITFIELD(reg, (entry*2) + 18, (entry*2) + 18 + 1);
- return knl_channel_remap(mc*3 + chan);
+ return knl_channel_remap(mc, chan);
}
/*
@@ -2997,8 +2997,15 @@
} else {
char A = *("A");
- channel = knl_channel_remap(channel);
+ /*
+ * Reported channel is in range 0-2, so we can't map it
+ * back to mc. To figure out mc we check machine check
+ * bank register that reported this error.
+ * bank15 means mc0 and bank16 means mc1.
+ */
+ channel = knl_channel_remap(m->bank == 16, channel);
channel_mask = 1 << channel;
+
snprintf(msg, sizeof(msg),
"%s%s err_code:%04x:%04x channel:%d (DIMM_%c)",
overflow ? " OVERFLOW" : "",
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index c99c24b..9ae6c11 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/efi.h>
+#include <linux/vmalloc.h>
#define NO_FURTHER_WRITE_ACTION -1
@@ -108,14 +109,15 @@
int ret;
void *cap_hdr_temp;
- cap_hdr_temp = kmap(cap_info->pages[0]);
+ cap_hdr_temp = vmap(cap_info->pages, cap_info->index,
+ VM_MAP, PAGE_KERNEL);
if (!cap_hdr_temp) {
- pr_debug("%s: kmap() failed\n", __func__);
+ pr_debug("%s: vmap() failed\n", __func__);
return -EFAULT;
}
ret = efi_capsule_update(cap_hdr_temp, cap_info->pages);
- kunmap(cap_info->pages[0]);
+ vunmap(cap_hdr_temp);
if (ret) {
pr_err("%s: efi_capsule_update() failed\n", __func__);
return ret;
diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c
index 53b9fd2..6eedff4 100644
--- a/drivers/firmware/efi/capsule.c
+++ b/drivers/firmware/efi/capsule.c
@@ -190,9 +190,9 @@
* map the capsule described by @capsule with its data in @pages and
* send it to the firmware via the UpdateCapsule() runtime service.
*
- * @capsule must be a virtual mapping of the first page in @pages
- * (@pages[0]) in the kernel address space. That is, a
- * capsule_header_t that describes the entire contents of the capsule
+ * @capsule must be a virtual mapping of the complete capsule update in the
+ * kernel address space, as the capsule can be consumed immediately.
+ * A capsule_header_t that describes the entire contents of the capsule
* must be at the start of the first data page.
*
* Even though this function will validate that the firmware supports
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index ff63b88..5cc7052 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -305,7 +305,7 @@
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
char *table = NULL;
- int size, i;
+ int size;
if (adev->pp_enabled)
size = amdgpu_dpm_get_pp_table(adev, &table);
@@ -315,10 +315,7 @@
if (size >= PAGE_SIZE)
size = PAGE_SIZE - 1;
- for (i = 0; i < size; i++) {
- sprintf(buf + i, "%02x", table[i]);
- }
- sprintf(buf + i, "\n");
+ memcpy(buf, table, size);
return size;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index b7742e6..9b61c8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -335,7 +335,7 @@
if (unlikely(r)) {
goto out_cleanup;
}
- r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
+ r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem);
out_cleanup:
ttm_bo_mem_put(bo, &tmp_mem);
return r;
@@ -368,7 +368,7 @@
if (unlikely(r)) {
return r;
}
- r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
+ r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem);
if (unlikely(r)) {
goto out_cleanup;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index e2f0e5d..a5c94b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -5779,6 +5779,7 @@
break;
case CHIP_KAVERI:
case CHIP_KABINI:
+ case CHIP_MULLINS:
default: BUG();
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index bff8668..b818461 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -270,7 +270,8 @@
static const u32 golden_settings_polaris11_a11[] =
{
- mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
+ mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
+ mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
mmDB_DEBUG2, 0xf00fffff, 0x00000400,
mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -279,7 +280,7 @@
mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
- mmSQ_CONFIG, 0x07f80000, 0x07180000,
+ mmSQ_CONFIG, 0x07f80000, 0x01180000,
mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
mmTCC_CTRL, 0x00100000, 0xf31fff7f,
mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
@@ -301,8 +302,8 @@
static const u32 golden_settings_polaris10_a11[] =
{
mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
- mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
- mmCB_HW_CONTROL_2, 0, 0x0f000000,
+ mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
+ mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
mmDB_DEBUG2, 0xf00fffff, 0x00000400,
mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -409,6 +410,7 @@
mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
mmTCC_CTRL, 0x00100000, 0xf31fff7f,
@@ -505,8 +507,10 @@
mmGB_GPU_ID, 0x0000000f, 0x00000000,
mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index d24a82b..0b0f086 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -144,6 +144,7 @@
break;
case CHIP_KAVERI:
case CHIP_KABINI:
+ case CHIP_MULLINS:
return 0;
default: BUG();
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 717359d..2aee2c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -103,6 +103,11 @@
mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
};
+static const u32 golden_settings_stoney_common[] =
+{
+ mmMC_HUB_RDREQ_UVD, MC_HUB_RDREQ_UVD__PRESCALE_MASK, 0x00000004,
+ mmMC_RD_GRP_OTH, MC_RD_GRP_OTH__UVD_MASK, 0x00600000
+};
static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
{
@@ -142,6 +147,9 @@
amdgpu_program_register_sequence(adev,
stoney_mgcg_cgcg_init,
(const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
+ amdgpu_program_register_sequence(adev,
+ golden_settings_stoney_common,
+ (const u32)ARRAY_SIZE(golden_settings_stoney_common));
break;
default:
break;
diff --git a/drivers/gpu/drm/cirrus/cirrus_main.c b/drivers/gpu/drm/cirrus/cirrus_main.c
index 80446e2..76bcb43 100644
--- a/drivers/gpu/drm/cirrus/cirrus_main.c
+++ b/drivers/gpu/drm/cirrus/cirrus_main.c
@@ -185,14 +185,23 @@
goto out;
}
+ /*
+ * cirrus_modeset_init() is initializing/registering the emulated fbdev
+ * and DRM internals can access/test some of the fields in
+ * mode_config->funcs as part of the fbdev registration process.
+ * Make sure dev->mode_config.funcs is properly set to avoid
+ * dereferencing a NULL pointer.
+ * FIXME: mode_config.funcs assignment should probably be done in
+ * cirrus_modeset_init() (that's a common pattern seen in other DRM
+ * drivers).
+ */
+ dev->mode_config.funcs = &cirrus_mode_funcs;
r = cirrus_modeset_init(cdev);
if (r) {
dev_err(&dev->pdev->dev, "Fatal error during modeset init: %d\n", r);
goto out;
}
- dev->mode_config.funcs = (void *)&cirrus_mode_funcs;
-
return 0;
out:
cirrus_driver_unload(dev);
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index f1d9f05..b1dbb60 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -1121,16 +1121,14 @@
struct drm_connector *connector;
int ret;
- mutex_lock(&dev->mode_config.mutex);
-
- drm_for_each_connector(connector, dev) {
+ /* FIXME: taking the mode config mutex ends up in a clash with
+ * fbcon/backlight registration */
+ list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
ret = drm_connector_register(connector);
if (ret)
goto err;
}
- mutex_unlock(&dev->mode_config.mutex);
-
return 0;
err:
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 7df26d4..637a0aa 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -74,6 +74,8 @@
#define EDID_QUIRK_FORCE_8BPC (1 << 8)
/* Force 12bpc */
#define EDID_QUIRK_FORCE_12BPC (1 << 9)
+/* Force 6bpc */
+#define EDID_QUIRK_FORCE_6BPC (1 << 10)
struct detailed_mode_closure {
struct drm_connector *connector;
@@ -100,6 +102,9 @@
/* Unknown Acer */
{ "ACR", 2423, EDID_QUIRK_FIRST_DETAILED_PREFERRED },
+ /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */
+ { "AEO", 0, EDID_QUIRK_FORCE_6BPC },
+
/* Belinea 10 15 55 */
{ "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
{ "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
@@ -3862,6 +3867,20 @@
/* HDMI deep color modes supported? Assign to info, if so */
drm_assign_hdmi_deep_color_info(edid, info, connector);
+ /*
+ * Digital sink with "DFP 1.x compliant TMDS" according to EDID 1.3?
+ *
+ * For such displays, the DFP spec 1.0, section 3.10 "EDID support"
+ * tells us to assume 8 bpc color depth if the EDID doesn't have
+ * extensions which tell otherwise.
+ */
+ if ((info->bpc == 0) && (edid->revision < 4) &&
+ (edid->input & DRM_EDID_DIGITAL_TYPE_DVI)) {
+ info->bpc = 8;
+ DRM_DEBUG("%s: Assigning DFP sink color depth as %d bpc.\n",
+ connector->name, info->bpc);
+ }
+
/* Only defined for 1.4 with digital displays */
if (edid->revision < 4)
return;
@@ -4082,6 +4101,9 @@
drm_add_display_info(edid, &connector->display_info, connector);
+ if (quirks & EDID_QUIRK_FORCE_6BPC)
+ connector->display_info.bpc = 6;
+
if (quirks & EDID_QUIRK_FORCE_8BPC)
connector->display_info.bpc = 8;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c457eed..dcf93b3 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -5691,15 +5691,7 @@
static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv)
{
- unsigned int i;
-
- for (i = 0; i < 15; i++) {
- if (skl_cdclk_pcu_ready(dev_priv))
- return true;
- udelay(10);
- }
-
- return false;
+ return _wait_for(skl_cdclk_pcu_ready(dev_priv), 3000, 10) == 0;
}
static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco)
@@ -12114,21 +12106,11 @@
pipe_config->pipe_bpp = connector->base.display_info.bpc*3;
}
- /* Clamp bpp to default limit on screens without EDID 1.4 */
- if (connector->base.display_info.bpc == 0) {
- int type = connector->base.connector_type;
- int clamp_bpp = 24;
-
- /* Fall back to 18 bpp when DP sink capability is unknown. */
- if (type == DRM_MODE_CONNECTOR_DisplayPort ||
- type == DRM_MODE_CONNECTOR_eDP)
- clamp_bpp = 18;
-
- if (bpp > clamp_bpp) {
- DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n",
- bpp, clamp_bpp);
- pipe_config->pipe_bpp = clamp_bpp;
- }
+ /* Clamp bpp to 8 on screens without EDID 1.4 */
+ if (connector->base.display_info.bpc == 0 && bpp > 24) {
+ DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n",
+ bpp);
+ pipe_config->pipe_bpp = 24;
}
}
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 86b00c6..3e3632c 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -782,7 +782,7 @@
struct intel_fbdev *ifbdev = dev_priv->fbdev;
struct fb_info *info;
- if (!ifbdev)
+ if (!ifbdev || !ifbdev->fb)
return;
info = ifbdev->helper.fbdev;
@@ -827,31 +827,28 @@
void intel_fbdev_output_poll_changed(struct drm_device *dev)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
- if (dev_priv->fbdev)
- drm_fb_helper_hotplug_event(&dev_priv->fbdev->helper);
+ struct intel_fbdev *ifbdev = to_i915(dev)->fbdev;
+
+ if (ifbdev && ifbdev->fb)
+ drm_fb_helper_hotplug_event(&ifbdev->helper);
}
void intel_fbdev_restore_mode(struct drm_device *dev)
{
- int ret;
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct intel_fbdev *ifbdev = dev_priv->fbdev;
- struct drm_fb_helper *fb_helper;
+ struct intel_fbdev *ifbdev = to_i915(dev)->fbdev;
if (!ifbdev)
return;
intel_fbdev_sync(ifbdev);
+ if (!ifbdev->fb)
+ return;
- fb_helper = &ifbdev->helper;
-
- ret = drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper);
- if (ret) {
+ if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper)) {
DRM_DEBUG("failed to restore crtc mode\n");
} else {
- mutex_lock(&fb_helper->dev->struct_mutex);
+ mutex_lock(&dev->struct_mutex);
intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT);
- mutex_unlock(&fb_helper->dev->struct_mutex);
+ mutex_unlock(&dev->struct_mutex);
}
}
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f4f3fcc..97ba6c8 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4892,7 +4892,8 @@
else
gen6_set_rps(dev_priv, dev_priv->rps.idle_freq);
dev_priv->rps.last_adj = 0;
- I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
+ I915_WRITE(GEN6_PMINTRMSK,
+ gen6_sanitize_rps_pm_mask(dev_priv, ~0));
}
mutex_unlock(&dev_priv->rps.hw_lock);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 528bdef..6190035 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1151,7 +1151,7 @@
if (ret)
goto out;
- ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
+ ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, new_mem);
out:
ttm_bo_mem_put(bo, &tmp_mem);
return ret;
@@ -1179,7 +1179,7 @@
if (ret)
return ret;
- ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
+ ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, &tmp_mem);
if (ret)
goto out;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index ffdad81..0c00e19 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -346,7 +346,7 @@
if (unlikely(r)) {
goto out_cleanup;
}
- r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
+ r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem);
out_cleanup:
ttm_bo_mem_put(bo, &tmp_mem);
return r;
@@ -379,7 +379,7 @@
if (unlikely(r)) {
return r;
}
- r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
+ r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem);
if (unlikely(r)) {
goto out_cleanup;
}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
index 4de3ff0..e03004f 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
@@ -125,6 +125,7 @@
/* Link drm_bridge to encoder */
bridge->encoder = encoder;
+ encoder->bridge = bridge;
ret = drm_bridge_attach(rcdu->ddev, bridge);
if (ret) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 4054d80..42c074a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -354,7 +354,8 @@
if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
- ret = ttm_bo_move_ttm(bo, evict, no_wait_gpu, mem);
+ ret = ttm_bo_move_ttm(bo, evict, interruptible, no_wait_gpu,
+ mem);
else if (bdev->driver->move)
ret = bdev->driver->move(bo, evict, interruptible,
no_wait_gpu, mem);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 2df602a..f157a9e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -45,7 +45,7 @@
}
int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
- bool evict,
+ bool evict, bool interruptible,
bool no_wait_gpu, struct ttm_mem_reg *new_mem)
{
struct ttm_tt *ttm = bo->ttm;
@@ -53,6 +53,14 @@
int ret;
if (old_mem->mem_type != TTM_PL_SYSTEM) {
+ ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+
+ if (unlikely(ret != 0)) {
+ if (ret != -ERESTARTSYS)
+ pr_err("Failed to expire sync object before unbinding TTM\n");
+ return ret;
+ }
+
ttm_tt_unbind(ttm);
ttm_bo_free_old_node(bo);
ttm_flag_masked(&old_mem->placement, TTM_PL_FLAG_SYSTEM,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 08a1e2f..00c8a08 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -68,7 +68,8 @@
if (!iovad)
return;
- put_iova_domain(iovad);
+ if (iovad->granule)
+ put_iova_domain(iovad);
kfree(iovad);
domain->iova_cookie = NULL;
}
@@ -151,12 +152,15 @@
}
}
-static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size,
+static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
dma_addr_t dma_limit)
{
+ struct iova_domain *iovad = domain->iova_cookie;
unsigned long shift = iova_shift(iovad);
unsigned long length = iova_align(iovad, size) >> shift;
+ if (domain->geometry.force_aperture)
+ dma_limit = min(dma_limit, domain->geometry.aperture_end);
/*
* Enforce size-alignment to be safe - there could perhaps be an
* attribute to control this per-device, or at least per-domain...
@@ -314,7 +318,7 @@
if (!pages)
return NULL;
- iova = __alloc_iova(iovad, size, dev->coherent_dma_mask);
+ iova = __alloc_iova(domain, size, dev->coherent_dma_mask);
if (!iova)
goto out_free_pages;
@@ -386,7 +390,7 @@
phys_addr_t phys = page_to_phys(page) + offset;
size_t iova_off = iova_offset(iovad, phys);
size_t len = iova_align(iovad, size + iova_off);
- struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev));
+ struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev));
if (!iova)
return DMA_ERROR_CODE;
@@ -538,7 +542,7 @@
prev = s;
}
- iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev));
+ iova = __alloc_iova(domain, iova_len, dma_get_mask(dev));
if (!iova)
goto out_restore_sg;
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index 9ed0a84..3dab13b 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -55,19 +55,19 @@
bool enable_4GB;
};
-static int compare_of(struct device *dev, void *data)
+static inline int compare_of(struct device *dev, void *data)
{
return dev->of_node == data;
}
-static int mtk_iommu_bind(struct device *dev)
+static inline int mtk_iommu_bind(struct device *dev)
{
struct mtk_iommu_data *data = dev_get_drvdata(dev);
return component_bind_all(dev, &data->smi_imu);
}
-static void mtk_iommu_unbind(struct device *dev)
+static inline void mtk_iommu_unbind(struct device *dev)
{
struct mtk_iommu_data *data = dev_get_drvdata(dev);
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 4387ccb..7410c6d 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -69,5 +69,6 @@
OBJCOPYFLAGS_lkdtm_rodata_objcopy.o := \
--set-section-flags .text=alloc,readonly \
--rename-section .text=.rodata
-$(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o
+targets += lkdtm_rodata.o lkdtm_rodata_objcopy.o
+$(obj)/lkdtm_rodata_objcopy.o: $(obj)/lkdtm_rodata.o FORCE
$(call if_changed,objcopy)
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index bdee9a0..c466ee2 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -90,8 +90,7 @@
*/
mutex_lock(&afu->contexts_lock);
idr_preload(GFP_KERNEL);
- i = idr_alloc(&ctx->afu->contexts_idr, ctx,
- ctx->afu->adapter->native->sl_ops->min_pe,
+ i = idr_alloc(&ctx->afu->contexts_idr, ctx, ctx->afu->adapter->min_pe,
ctx->afu->num_procs, GFP_NOWAIT);
idr_preload_end();
mutex_unlock(&afu->contexts_lock);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index de09053..344a0ff 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -561,7 +561,6 @@
u64 (*timebase_read)(struct cxl *adapter);
int capi_mode;
bool needs_reset_before_disable;
- int min_pe;
};
struct cxl_native {
@@ -603,6 +602,7 @@
struct bin_attribute cxl_attr;
int adapter_num;
int user_irqs;
+ int min_pe;
u64 ps_size;
u16 psl_rev;
u16 base_image;
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 3bcdaee..e606fdc 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -924,7 +924,7 @@
return fail_psl_irq(afu, &irq_info);
}
-void native_irq_wait(struct cxl_context *ctx)
+static void native_irq_wait(struct cxl_context *ctx)
{
u64 dsisr;
int timeout = 1000;
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index d152e2d..6f0c4ac 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -379,7 +379,7 @@
static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_dev *dev)
{
- u64 psl_dsnctl;
+ u64 psl_dsnctl, psl_fircntl;
u64 chipid;
u64 capp_unit_id;
int rc;
@@ -398,8 +398,11 @@
cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL);
/* snoop write mask */
cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL);
- /* set fir_accum */
- cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL);
+ /* set fir_cntl to recommended value for production env */
+ psl_fircntl = (0x2ULL << (63-3)); /* ce_report */
+ psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */
+ psl_fircntl |= 0x1ULL; /* ce_thresh */
+ cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl);
/* for debugging with trace arrays */
cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL);
@@ -1521,14 +1524,15 @@
.write_timebase_ctrl = write_timebase_ctrl_xsl,
.timebase_read = timebase_read_xsl,
.capi_mode = OPAL_PHB_CAPI_MODE_DMA,
- .min_pe = 1, /* Workaround for Mellanox CX4 HW bug */
};
static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev)
{
if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) {
+ /* Mellanox CX-4 */
dev_info(&adapter->dev, "Device uses an XSL\n");
adapter->native->sl_ops = &xsl_ops;
+ adapter->min_pe = 1; /* Workaround for CX-4 hardware bug */
} else {
dev_info(&adapter->dev, "Device uses a PSL\n");
adapter->native->sl_ops = &psl_ops;
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index dee8def..7ada5f1 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -221,7 +221,7 @@
/* Setup the PHB using arch provided callback */
phb->ops = &cxl_pcie_pci_ops;
phb->cfg_addr = NULL;
- phb->cfg_data = 0;
+ phb->cfg_data = NULL;
phb->private_data = afu;
phb->controller_ops = cxl_pci_controller_ops;
diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c
index 5a3fd76..5525a20 100644
--- a/drivers/misc/lkdtm_usercopy.c
+++ b/drivers/misc/lkdtm_usercopy.c
@@ -49,7 +49,7 @@
/* This is a pointer to outside our current stack frame. */
if (bad_frame) {
- bad_stack = do_usercopy_stack_callee((uintptr_t)bad_stack);
+ bad_stack = do_usercopy_stack_callee((uintptr_t)&bad_stack);
} else {
/* Put start address just inside stack. */
bad_stack = task_stack_page(current) + THREAD_SIZE;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1f276fa..217e8da 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -152,7 +152,7 @@
MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; "
"0 for slow, 1 for fast");
module_param(ad_select, charp, 0);
-MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic; "
+MODULE_PARM_DESC(ad_select, "802.3ad aggregation selection logic; "
"0 for stable (default), 1 for bandwidth, "
"2 for count");
module_param(min_links, int, 0);
diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h
index 8f12bdd..a0b453e 100644
--- a/drivers/net/dsa/b53/b53_regs.h
+++ b/drivers/net/dsa/b53/b53_regs.h
@@ -258,7 +258,7 @@
* BCM5325 and BCM5365 share most definitions below
*/
#define B53_ARLTBL_MAC_VID_ENTRY(n) (0x10 * (n))
-#define ARLTBL_MAC_MASK 0xffffffffffff
+#define ARLTBL_MAC_MASK 0xffffffffffffULL
#define ARLTBL_VID_S 48
#define ARLTBL_VID_MASK_25 0xff
#define ARLTBL_VID_MASK 0xfff
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index d36aedd..d1d9d3c 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3187,6 +3187,7 @@
return err;
}
+#ifdef CONFIG_NET_DSA_HWMON
static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page,
int reg)
{
@@ -3212,6 +3213,7 @@
return ret;
}
+#endif
static int mv88e6xxx_port_to_mdio_addr(struct mv88e6xxx_chip *chip, int port)
{
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 37a0f46..18bb955 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -793,6 +793,8 @@
netdev_err(ndev, "Could not connect to PHY\n");
return -ENODEV;
}
+#else
+ return -ENODEV;
#endif
}
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 4bff0f3..b0da969 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -771,8 +771,10 @@
priv->dev = dev;
priv->regs = devm_ioremap_resource(dev, &res_regs);
- if (IS_ERR(priv->regs))
- return PTR_ERR(priv->regs);
+ if (IS_ERR(priv->regs)) {
+ err = PTR_ERR(priv->regs);
+ goto out_put_node;
+ }
dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs);
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index ff300f7..6592612 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12552,10 +12552,6 @@
info->data = TG3_RSS_MAX_NUM_QS;
}
- /* The first interrupt vector only
- * handles link interrupts.
- */
- info->data -= 1;
return 0;
default:
@@ -14014,6 +14010,7 @@
}
if ((ec->rx_coalesce_usecs > MAX_RXCOL_TICKS) ||
+ (!ec->rx_coalesce_usecs) ||
(ec->tx_coalesce_usecs > MAX_TXCOL_TICKS) ||
(ec->rx_max_coalesced_frames > MAX_RXMAX_FRAMES) ||
(ec->tx_max_coalesced_frames > MAX_TXMAX_FRAMES) ||
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 36893d8..b6fcf10 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -403,11 +403,11 @@
#define MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII 0x00000004
#define MACB_CAPS_NO_GIGABIT_HALF 0x00000008
#define MACB_CAPS_USRIO_DISABLED 0x00000010
+#define MACB_CAPS_JUMBO 0x00000020
#define MACB_CAPS_FIFO_MODE 0x10000000
#define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000
#define MACB_CAPS_SG_DISABLED 0x40000000
#define MACB_CAPS_MACB_IS_GEM 0x80000000
-#define MACB_CAPS_JUMBO 0x00000010
/* Bit manipulation macros */
#define MACB_BIT(name) \
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 1471e16..f45385f 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1299,6 +1299,7 @@
dm9000_open(struct net_device *dev)
{
struct board_info *db = netdev_priv(dev);
+ unsigned int irq_flags = irq_get_trigger_type(dev->irq);
if (netif_msg_ifup(db))
dev_dbg(db->dev, "enabling %s\n", dev->name);
@@ -1306,9 +1307,11 @@
/* If there is no IRQ type specified, tell the user that this is a
* problem
*/
- if (irq_get_trigger_type(dev->irq) == IRQF_TRIGGER_NONE)
+ if (irq_flags == IRQF_TRIGGER_NONE)
dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n");
+ irq_flags |= IRQF_SHARED;
+
/* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */
iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */
mdelay(1); /* delay needs by DM9000B */
@@ -1316,8 +1319,7 @@
/* Initialize DM9000 board */
dm9000_init_dm9000(dev);
- if (request_irq(dev->irq, dm9000_interrupt, IRQF_SHARED,
- dev->name, dev))
+ if (request_irq(dev->irq, dm9000_interrupt, irq_flags, dev->name, dev))
return -EAGAIN;
/* Now that we have an interrupt handler hooked up we can unmask
* our interrupts
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 1235c7f..1e1eb92 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -17,7 +17,7 @@
{"gmac_rx_octets_total_ok", MAC_STATS_FIELD_OFF(rx_good_bytes)},
{"gmac_rx_octets_bad", MAC_STATS_FIELD_OFF(rx_bad_bytes)},
{"gmac_rx_uc_pkts", MAC_STATS_FIELD_OFF(rx_uc_pkts)},
- {"gamc_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)},
+ {"gmac_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)},
{"gmac_rx_bc_pkts", MAC_STATS_FIELD_OFF(rx_bc_pkts)},
{"gmac_rx_pkts_64octets", MAC_STATS_FIELD_OFF(rx_64bytes)},
{"gmac_rx_pkts_65to127", MAC_STATS_FIELD_OFF(rx_65to127)},
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index 7fd4d54..6b03c85 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -2032,7 +2032,8 @@
| FLAG2_DISABLE_ASPM_L0S
| FLAG2_DISABLE_ASPM_L1
| FLAG2_NO_DISABLE_RX
- | FLAG2_DMA_BURST,
+ | FLAG2_DMA_BURST
+ | FLAG2_CHECK_SYSTIM_OVERFLOW,
.pba = 32,
.max_hw_frame_size = DEFAULT_JUMBO,
.get_variants = e1000_get_variants_82571,
@@ -2053,7 +2054,8 @@
| FLAG_HAS_CTRLEXT_ON_LOAD,
.flags2 = FLAG2_DISABLE_ASPM_L0S
| FLAG2_DISABLE_ASPM_L1
- | FLAG2_NO_DISABLE_RX,
+ | FLAG2_NO_DISABLE_RX
+ | FLAG2_CHECK_SYSTIM_OVERFLOW,
.pba = 32,
.max_hw_frame_size = DEFAULT_JUMBO,
.get_variants = e1000_get_variants_82571,
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index ef96cd1..879cca4 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -452,6 +452,7 @@
#define FLAG2_PCIM2PCI_ARBITER_WA BIT(11)
#define FLAG2_DFLT_CRC_STRIPPING BIT(12)
#define FLAG2_CHECK_RX_HWTSTAMP BIT(13)
+#define FLAG2_CHECK_SYSTIM_OVERFLOW BIT(14)
#define E1000_RX_DESC_PS(R, i) \
(&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 3e11322..f3aaca7 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -5885,7 +5885,8 @@
| FLAG_HAS_JUMBO_FRAMES
| FLAG_APME_IN_WUC,
.flags2 = FLAG2_HAS_PHY_STATS
- | FLAG2_HAS_EEE,
+ | FLAG2_HAS_EEE
+ | FLAG2_CHECK_SYSTIM_OVERFLOW,
.pba = 26,
.max_hw_frame_size = 9022,
.get_variants = e1000_get_variants_ich8lan,
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 02f4439..7017281 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4303,6 +4303,42 @@
}
/**
+ * e1000e_sanitize_systim - sanitize raw cycle counter reads
+ * @hw: pointer to the HW structure
+ * @systim: cycle_t value read, sanitized and returned
+ *
+ * Errata for 82574/82583 possible bad bits read from SYSTIMH/L:
+ * check to see that the time is incrementing at a reasonable
+ * rate and is a multiple of incvalue.
+ **/
+static cycle_t e1000e_sanitize_systim(struct e1000_hw *hw, cycle_t systim)
+{
+ u64 time_delta, rem, temp;
+ cycle_t systim_next;
+ u32 incvalue;
+ int i;
+
+ incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
+ for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
+ /* latch SYSTIMH on read of SYSTIML */
+ systim_next = (cycle_t)er32(SYSTIML);
+ systim_next |= (cycle_t)er32(SYSTIMH) << 32;
+
+ time_delta = systim_next - systim;
+ temp = time_delta;
+ /* VMWare users have seen incvalue of zero, don't div / 0 */
+ rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
+
+ systim = systim_next;
+
+ if ((time_delta < E1000_82574_SYSTIM_EPSILON) && (rem == 0))
+ break;
+ }
+
+ return systim;
+}
+
+/**
* e1000e_cyclecounter_read - read raw cycle counter (used by time counter)
* @cc: cyclecounter structure
**/
@@ -4312,7 +4348,7 @@
cc);
struct e1000_hw *hw = &adapter->hw;
u32 systimel, systimeh;
- cycle_t systim, systim_next;
+ cycle_t systim;
/* SYSTIMH latching upon SYSTIML read does not work well.
* This means that if SYSTIML overflows after we read it but before
* we read SYSTIMH, the value of SYSTIMH has been incremented and we
@@ -4335,33 +4371,9 @@
systim = (cycle_t)systimel;
systim |= (cycle_t)systimeh << 32;
- if ((hw->mac.type == e1000_82574) || (hw->mac.type == e1000_82583)) {
- u64 time_delta, rem, temp;
- u32 incvalue;
- int i;
+ if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW)
+ systim = e1000e_sanitize_systim(hw, systim);
- /* errata for 82574/82583 possible bad bits read from SYSTIMH/L
- * check to see that the time is incrementing at a reasonable
- * rate and is a multiple of incvalue
- */
- incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
- for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
- /* latch SYSTIMH on read of SYSTIML */
- systim_next = (cycle_t)er32(SYSTIML);
- systim_next |= (cycle_t)er32(SYSTIMH) << 32;
-
- time_delta = systim_next - systim;
- temp = time_delta;
- /* VMWare users have seen incvalue of zero, don't div / 0 */
- rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
-
- systim = systim_next;
-
- if ((time_delta < E1000_82574_SYSTIM_EPSILON) &&
- (rem == 0))
- break;
- }
- }
return systim;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 81c99e1..c6ac7a6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4554,23 +4554,38 @@
**/
static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg)
{
+ int i, tc_unused = 0;
u8 num_tc = 0;
- int i;
+ u8 ret = 0;
/* Scan the ETS Config Priority Table to find
* traffic class enabled for a given priority
- * and use the traffic class index to get the
- * number of traffic classes enabled
+ * and create a bitmask of enabled TCs
*/
- for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
- if (dcbcfg->etscfg.prioritytable[i] > num_tc)
- num_tc = dcbcfg->etscfg.prioritytable[i];
+ for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
+ num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]);
+
+ /* Now scan the bitmask to check for
+ * contiguous TCs starting with TC0
+ */
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+ if (num_tc & BIT(i)) {
+ if (!tc_unused) {
+ ret++;
+ } else {
+ pr_err("Non-contiguous TC - Disabling DCB\n");
+ return 1;
+ }
+ } else {
+ tc_unused = 1;
+ }
}
- /* Traffic class index starts from zero so
- * increment to return the actual count
- */
- return num_tc + 1;
+ /* There is always at least TC0 */
+ if (!ret)
+ ret = 1;
+
+ return ret;
}
/**
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index e61b647..336c103 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -744,7 +744,8 @@
}
}
- shhwtstamps.hwtstamp = ktime_sub_ns(shhwtstamps.hwtstamp, adjust);
+ shhwtstamps.hwtstamp =
+ ktime_add_ns(shhwtstamps.hwtstamp, adjust);
skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
dev_kfree_skb_any(adapter->ptp_tx_skb);
@@ -767,13 +768,32 @@
struct sk_buff *skb)
{
__le64 *regval = (__le64 *)va;
+ struct igb_adapter *adapter = q_vector->adapter;
+ int adjust = 0;
/* The timestamp is recorded in little endian format.
* DWORD: 0 1 2 3
* Field: Reserved Reserved SYSTIML SYSTIMH
*/
- igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
+ igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
le64_to_cpu(regval[1]));
+
+ /* adjust timestamp for the RX latency based on link speed */
+ if (adapter->hw.mac.type == e1000_i210) {
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ adjust = IGB_I210_RX_LATENCY_10;
+ break;
+ case SPEED_100:
+ adjust = IGB_I210_RX_LATENCY_100;
+ break;
+ case SPEED_1000:
+ adjust = IGB_I210_RX_LATENCY_1000;
+ break;
+ }
+ }
+ skb_hwtstamps(skb)->hwtstamp =
+ ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
}
/**
@@ -825,7 +845,7 @@
}
}
skb_hwtstamps(skb)->hwtstamp =
- ktime_add_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
+ ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
/* Update the last_rx_timestamp timer in order to enable watchdog check
* for error case of latched timestamp on a dropped packet.
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 5418c69a..b4f0374 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -4100,6 +4100,8 @@
struct ixgbe_hw *hw = &adapter->hw;
u32 vlnctrl, i;
+ vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+
switch (hw->mac.type) {
case ixgbe_mac_82599EB:
case ixgbe_mac_X540:
@@ -4112,8 +4114,7 @@
/* fall through */
case ixgbe_mac_82598EB:
/* legacy case, we can just disable VLAN filtering */
- vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
- vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
+ vlnctrl &= ~IXGBE_VLNCTRL_VFE;
IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
return;
}
@@ -4125,6 +4126,10 @@
/* Set flag so we don't redo unnecessary work */
adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC;
+ /* For VMDq and SR-IOV we must leave VLAN filtering enabled */
+ vlnctrl |= IXGBE_VLNCTRL_VFE;
+ IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+
/* Add PF to all active pools */
for (i = IXGBE_VLVF_ENTRIES; --i;) {
u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32);
@@ -4191,6 +4196,11 @@
struct ixgbe_hw *hw = &adapter->hw;
u32 vlnctrl, i;
+ /* Set VLAN filtering to enabled */
+ vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+ vlnctrl |= IXGBE_VLNCTRL_VFE;
+ IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+
switch (hw->mac.type) {
case ixgbe_mac_82599EB:
case ixgbe_mac_X540:
@@ -4202,10 +4212,6 @@
break;
/* fall through */
case ixgbe_mac_82598EB:
- vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
- vlnctrl &= ~IXGBE_VLNCTRL_CFIEN;
- vlnctrl |= IXGBE_VLNCTRL_VFE;
- IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
return;
}
@@ -8390,12 +8396,14 @@
struct tcf_exts *exts, u64 *action, u8 *queue)
{
const struct tc_action *a;
+ LIST_HEAD(actions);
int err;
if (tc_no_actions(exts))
return -EINVAL;
- tc_for_each_action(a, exts) {
+ tcf_exts_to_list(exts, &actions);
+ list_for_each_entry(a, &actions, list) {
/* Drop action */
if (is_tcf_gact_shot(a)) {
@@ -9517,6 +9525,7 @@
/* copy netdev features into list of user selectable features */
netdev->hw_features |= netdev->features |
+ NETIF_F_HW_VLAN_CTAG_FILTER |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_RXALL |
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b57ae3a..f160954 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -245,12 +245,16 @@
case PHY_INTERFACE_MODE_MII:
ge_mode = 1;
break;
- case PHY_INTERFACE_MODE_RMII:
+ case PHY_INTERFACE_MODE_REVMII:
ge_mode = 2;
break;
+ case PHY_INTERFACE_MODE_RMII:
+ if (!mac->id)
+ goto err_phy;
+ ge_mode = 3;
+ break;
default:
- dev_err(eth->dev, "invalid phy_mode\n");
- return -1;
+ goto err_phy;
}
/* put the gmac into the right mode */
@@ -263,13 +267,25 @@
mac->phy_dev->autoneg = AUTONEG_ENABLE;
mac->phy_dev->speed = 0;
mac->phy_dev->duplex = 0;
+
+ if (of_phy_is_fixed_link(mac->of_node))
+ mac->phy_dev->supported |=
+ SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+
mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
SUPPORTED_Asym_Pause;
mac->phy_dev->advertising = mac->phy_dev->supported |
ADVERTISED_Autoneg;
phy_start_aneg(mac->phy_dev);
+ of_node_put(np);
+
return 0;
+
+err_phy:
+ of_node_put(np);
+ dev_err(eth->dev, "invalid phy_mode\n");
+ return -EINVAL;
}
static int mtk_mdio_init(struct mtk_eth *eth)
@@ -542,15 +558,15 @@
return &ring->buf[idx];
}
-static void mtk_tx_unmap(struct device *dev, struct mtk_tx_buf *tx_buf)
+static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
{
if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
- dma_unmap_single(dev,
+ dma_unmap_single(eth->dev,
dma_unmap_addr(tx_buf, dma_addr0),
dma_unmap_len(tx_buf, dma_len0),
DMA_TO_DEVICE);
} else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
- dma_unmap_page(dev,
+ dma_unmap_page(eth->dev,
dma_unmap_addr(tx_buf, dma_addr0),
dma_unmap_len(tx_buf, dma_len0),
DMA_TO_DEVICE);
@@ -595,9 +611,9 @@
if (skb_vlan_tag_present(skb))
txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb);
- mapped_addr = dma_map_single(&dev->dev, skb->data,
+ mapped_addr = dma_map_single(eth->dev, skb->data,
skb_headlen(skb), DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+ if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
return -ENOMEM;
WRITE_ONCE(itxd->txd1, mapped_addr);
@@ -623,10 +639,10 @@
n_desc++;
frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN);
- mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset,
+ mapped_addr = skb_frag_dma_map(eth->dev, frag, offset,
frag_map_size,
DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+ if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
goto err_dma;
if (i == nr_frags - 1 &&
@@ -679,7 +695,7 @@
tx_buf = mtk_desc_to_tx_buf(ring, itxd);
/* unmap dma */
- mtk_tx_unmap(&dev->dev, tx_buf);
+ mtk_tx_unmap(eth, tx_buf);
itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2);
@@ -836,11 +852,11 @@
netdev->stats.rx_dropped++;
goto release_desc;
}
- dma_addr = dma_map_single(ð->netdev[mac]->dev,
+ dma_addr = dma_map_single(eth->dev,
new_data + NET_SKB_PAD,
ring->buf_size,
DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
+ if (unlikely(dma_mapping_error(eth->dev, dma_addr))) {
skb_free_frag(new_data);
netdev->stats.rx_dropped++;
goto release_desc;
@@ -855,7 +871,7 @@
}
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
- dma_unmap_single(&netdev->dev, trxd.rxd1,
+ dma_unmap_single(eth->dev, trxd.rxd1,
ring->buf_size, DMA_FROM_DEVICE);
pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
skb->dev = netdev;
@@ -937,7 +953,7 @@
done[mac]++;
budget--;
}
- mtk_tx_unmap(eth->dev, tx_buf);
+ mtk_tx_unmap(eth, tx_buf);
ring->last_free = desc;
atomic_inc(&ring->free_count);
@@ -1092,7 +1108,7 @@
if (ring->buf) {
for (i = 0; i < MTK_DMA_SIZE; i++)
- mtk_tx_unmap(eth->dev, &ring->buf[i]);
+ mtk_tx_unmap(eth, &ring->buf[i]);
kfree(ring->buf);
ring->buf = NULL;
}
@@ -1751,6 +1767,7 @@
goto free_netdev;
}
spin_lock_init(&mac->hw_stats->stats_lock);
+ u64_stats_init(&mac->hw_stats->syncp);
mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
SET_NETDEV_DEV(eth->netdev[id], eth->dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 0f19b01..dc8b1cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -318,6 +318,7 @@
u32 *action, u32 *flow_tag)
{
const struct tc_action *a;
+ LIST_HEAD(actions);
if (tc_no_actions(exts))
return -EINVAL;
@@ -325,7 +326,8 @@
*flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
*action = 0;
- tc_for_each_action(a, exts) {
+ tcf_exts_to_list(exts, &actions);
+ list_for_each_entry(a, &actions, list) {
/* Only support a single action per rule */
if (*action)
return -EINVAL;
@@ -362,13 +364,15 @@
u32 *action, u32 *dest_vport)
{
const struct tc_action *a;
+ LIST_HEAD(actions);
if (tc_no_actions(exts))
return -EINVAL;
*action = 0;
- tc_for_each_action(a, exts) {
+ tcf_exts_to_list(exts, &actions);
+ list_for_each_entry(a, &actions, list) {
/* Only support a single action per rule */
if (*action)
return -EINVAL;
@@ -503,6 +507,7 @@
struct mlx5e_tc_flow *flow;
struct tc_action *a;
struct mlx5_fc *counter;
+ LIST_HEAD(actions);
u64 bytes;
u64 packets;
u64 lastuse;
@@ -518,7 +523,8 @@
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
- tc_for_each_action(a, f->exts)
+ tcf_exts_to_list(f->exts, &actions);
+ list_for_each_entry(a, &actions, list)
tcf_action_stats_update(a, bytes, packets, lastuse);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 7ca9201..1721098 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3383,6 +3383,15 @@
*/
MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1);
+/* reg_ritr_lb_en
+ * Loop-back filter enable for unicast packets.
+ * If the flag is set then loop-back filter for unicast packets is
+ * implemented on the RIF. Multicast packets are always subject to
+ * loop-back filtering.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, lb_en, 0x04, 24, 1);
+
/* reg_ritr_virtual_router
* Virtual router ID associated with the router interface.
* Access: RW
@@ -3484,6 +3493,7 @@
mlxsw_reg_ritr_op_set(payload, op);
mlxsw_reg_ritr_rif_set(payload, rif);
mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
+ mlxsw_reg_ritr_lb_en_set(payload, 1);
mlxsw_reg_ritr_mtu_set(payload, mtu);
mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
}
@@ -4000,6 +4010,7 @@
{
MLXSW_REG_ZERO(ralue, payload);
mlxsw_reg_ralue_protocol_set(payload, protocol);
+ mlxsw_reg_ralue_op_set(payload, op);
mlxsw_reg_ralue_virtual_router_set(payload, virtual_router);
mlxsw_reg_ralue_prefix_len_set(payload, prefix_len);
mlxsw_reg_ralue_entry_type_set(payload,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index c3e6150..1f81689 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -942,8 +942,8 @@
kfree(mlxsw_sp_vport);
}
-int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
- u16 vid)
+static int mlxsw_sp_port_add_vid(struct net_device *dev,
+ __be16 __always_unused proto, u16 vid)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp_port *mlxsw_sp_vport;
@@ -956,16 +956,12 @@
if (!vid)
return 0;
- if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) {
- netdev_warn(dev, "VID=%d already configured\n", vid);
+ if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid))
return 0;
- }
mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vid);
- if (!mlxsw_sp_vport) {
- netdev_err(dev, "Failed to create vPort for VID=%d\n", vid);
+ if (!mlxsw_sp_vport)
return -ENOMEM;
- }
/* When adding the first VLAN interface on a bridged port we need to
* transition all the active 802.1Q bridge VLANs to use explicit
@@ -973,24 +969,17 @@
*/
if (list_is_singular(&mlxsw_sp_port->vports_list)) {
err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port);
- if (err) {
- netdev_err(dev, "Failed to set to Virtual mode\n");
+ if (err)
goto err_port_vp_mode_trans;
- }
}
err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
- if (err) {
- netdev_err(dev, "Failed to disable learning for VID=%d\n", vid);
+ if (err)
goto err_port_vid_learning_set;
- }
err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged);
- if (err) {
- netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
- vid);
+ if (err)
goto err_port_add_vid;
- }
return 0;
@@ -1010,7 +999,6 @@
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct mlxsw_sp_port *mlxsw_sp_vport;
struct mlxsw_sp_fid *f;
- int err;
/* VLAN 0 is removed from HW filter when device goes down, but
* it is reserved in our case, so simply return.
@@ -1019,23 +1007,12 @@
return 0;
mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
- if (!mlxsw_sp_vport) {
- netdev_warn(dev, "VID=%d does not exist\n", vid);
+ if (WARN_ON(!mlxsw_sp_vport))
return 0;
- }
- err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
- if (err) {
- netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
- vid);
- return err;
- }
+ mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
- err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
- if (err) {
- netdev_err(dev, "Failed to enable learning for VID=%d\n", vid);
- return err;
- }
+ mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
/* Drop FID reference. If this was the last reference the
* resources will be freed.
@@ -1048,13 +1025,8 @@
* transition all active 802.1Q bridge VLANs to use VID to FID
* mappings and set port's mode to VLAN mode.
*/
- if (list_is_singular(&mlxsw_sp_port->vports_list)) {
- err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
- if (err) {
- netdev_err(dev, "Failed to set to VLAN mode\n");
- return err;
- }
- }
+ if (list_is_singular(&mlxsw_sp_port->vports_list))
+ mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
mlxsw_sp_port_vport_destroy(mlxsw_sp_vport);
@@ -1149,6 +1121,7 @@
bool ingress)
{
const struct tc_action *a;
+ LIST_HEAD(actions);
int err;
if (!tc_single_action(cls->exts)) {
@@ -1156,7 +1129,8 @@
return -ENOTSUPP;
}
- tc_for_each_action(a, cls->exts) {
+ tcf_exts_to_list(cls->exts, &actions);
+ list_for_each_entry(a, &actions, list) {
if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL))
return -ENOTSUPP;
@@ -2076,6 +2050,18 @@
return 0;
}
+static int mlxsw_sp_port_pvid_vport_create(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ mlxsw_sp_port->pvid = 1;
+
+ return mlxsw_sp_port_add_vid(mlxsw_sp_port->dev, 0, 1);
+}
+
+static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
+}
+
static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
bool split, u8 module, u8 width, u8 lane)
{
@@ -2191,7 +2177,15 @@
goto err_port_dcb_init;
}
+ err = mlxsw_sp_port_pvid_vport_create(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create PVID vPort\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_pvid_vport_create;
+ }
+
mlxsw_sp_port_switchdev_init(mlxsw_sp_port);
+ mlxsw_sp->ports[local_port] = mlxsw_sp_port;
err = register_netdev(dev);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n",
@@ -2208,24 +2202,23 @@
goto err_core_port_init;
}
- err = mlxsw_sp_port_vlan_init(mlxsw_sp_port);
- if (err)
- goto err_port_vlan_init;
-
- mlxsw_sp->ports[local_port] = mlxsw_sp_port;
return 0;
-err_port_vlan_init:
- mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
err_core_port_init:
unregister_netdev(dev);
err_register_netdev:
+ mlxsw_sp->ports[local_port] = NULL;
+ mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
+ mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
+err_port_pvid_vport_create:
+ mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
err_port_dcb_init:
err_port_ets_init:
err_port_buffers_init:
err_port_admin_status_set:
err_port_mtu_set:
err_port_speed_by_width_set:
+ mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
err_port_swid_set:
err_port_system_port_mapping_set:
err_dev_addr_init:
@@ -2245,12 +2238,12 @@
if (!mlxsw_sp_port)
return;
- mlxsw_sp->ports[local_port] = NULL;
mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
- mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
- mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
+ mlxsw_sp->ports[local_port] = NULL;
mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
+ mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
+ mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port);
free_percpu(mlxsw_sp_port->pcpu_stats);
@@ -2662,6 +2655,26 @@
{
.func = mlxsw_sp_rx_listener_func,
.local_port = MLXSW_PORT_DONT_CARE,
+ .trap_id = MLXSW_TRAP_ID_MTUERROR,
+ },
+ {
+ .func = mlxsw_sp_rx_listener_func,
+ .local_port = MLXSW_PORT_DONT_CARE,
+ .trap_id = MLXSW_TRAP_ID_TTLERROR,
+ },
+ {
+ .func = mlxsw_sp_rx_listener_func,
+ .local_port = MLXSW_PORT_DONT_CARE,
+ .trap_id = MLXSW_TRAP_ID_LBERROR,
+ },
+ {
+ .func = mlxsw_sp_rx_listener_func,
+ .local_port = MLXSW_PORT_DONT_CARE,
+ .trap_id = MLXSW_TRAP_ID_OSPF,
+ },
+ {
+ .func = mlxsw_sp_rx_listener_func,
+ .local_port = MLXSW_PORT_DONT_CARE,
.trap_id = MLXSW_TRAP_ID_IP2ME,
},
{
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index f69aa37..ab3feb8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -536,8 +536,6 @@
u16 vid);
int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
u16 vid_end, bool is_member, bool untagged);
-int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
- u16 vid);
int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
bool set);
void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 074cdda..237418a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -330,7 +330,7 @@
MLXSW_SP_CPU_PORT_SB_CM,
MLXSW_SP_CPU_PORT_SB_CM,
MLXSW_SP_CPU_PORT_SB_CM,
- MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 0, 0),
MLXSW_SP_CPU_PORT_SB_CM,
MLXSW_SP_CPU_PORT_SB_CM,
MLXSW_SP_CPU_PORT_SB_CM,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
index 01cfb75..b6ed7f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -341,6 +341,8 @@
char pfcc_pl[MLXSW_REG_PFCC_LEN];
mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port);
+ mlxsw_reg_pfcc_pprx_set(pfcc_pl, mlxsw_sp_port->link.rx_pause);
+ mlxsw_reg_pfcc_pptx_set(pfcc_pl, mlxsw_sp_port->link.tx_pause);
mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en);
return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc),
@@ -351,17 +353,17 @@
struct ieee_pfc *pfc)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
int err;
- if ((mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) &&
- pfc->pfc_en) {
+ if (pause_en && pfc->pfc_en) {
netdev_err(dev, "PAUSE frames already enabled on port\n");
return -EINVAL;
}
err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
mlxsw_sp_port->dcb.ets->prio_tc,
- false, pfc);
+ pause_en, pfc);
if (err) {
netdev_err(dev, "Failed to configure port's headroom for PFC\n");
return err;
@@ -380,7 +382,7 @@
err_port_pfc_set:
__mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
- mlxsw_sp_port->dcb.ets->prio_tc, false,
+ mlxsw_sp_port->dcb.ets->prio_tc, pause_en,
mlxsw_sp_port->dcb.pfc);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 81418d6..90bb93b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1651,9 +1651,10 @@
const struct mlxsw_sp_router_fib4_add_info *info = data;
struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry;
struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp;
+ struct mlxsw_sp_vr *vr = fib_entry->vr;
mlxsw_sp_fib_entry_destroy(fib_entry);
- mlxsw_sp_vr_put(mlxsw_sp, fib_entry->vr);
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
kfree(info);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index a1ad5e6..d1b59cd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -450,6 +450,8 @@
kfree(f);
+ mlxsw_sp_fid_map(mlxsw_sp, fid, false);
+
mlxsw_sp_fid_op(mlxsw_sp, fid, false);
}
@@ -997,13 +999,13 @@
}
static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
- u16 vid_begin, u16 vid_end, bool init)
+ u16 vid_begin, u16 vid_end)
{
struct net_device *dev = mlxsw_sp_port->dev;
u16 vid, pvid;
int err;
- if (!init && !mlxsw_sp_port->bridged)
+ if (!mlxsw_sp_port->bridged)
return -EINVAL;
err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end,
@@ -1014,9 +1016,6 @@
return err;
}
- if (init)
- goto out;
-
pvid = mlxsw_sp_port->pvid;
if (pvid >= vid_begin && pvid <= vid_end) {
err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
@@ -1028,7 +1027,6 @@
mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end);
-out:
/* Changing activity bits only if HW operation succeded */
for (vid = vid_begin; vid <= vid_end; vid++)
clear_bit(vid, mlxsw_sp_port->active_vlans);
@@ -1039,8 +1037,8 @@
static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
const struct switchdev_obj_port_vlan *vlan)
{
- return __mlxsw_sp_port_vlans_del(mlxsw_sp_port,
- vlan->vid_begin, vlan->vid_end, false);
+ return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vlan->vid_begin,
+ vlan->vid_end);
}
void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port)
@@ -1048,7 +1046,7 @@
u16 vid;
for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
- __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false);
+ __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid);
}
static int
@@ -1546,32 +1544,6 @@
mlxsw_sp_fdb_fini(mlxsw_sp);
}
-int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port)
-{
- struct net_device *dev = mlxsw_sp_port->dev;
- int err;
-
- /* Allow only untagged packets to ingress and tag them internally
- * with VID 1.
- */
- mlxsw_sp_port->pvid = 1;
- err = __mlxsw_sp_port_vlans_del(mlxsw_sp_port, 0, VLAN_N_VID - 1,
- true);
- if (err) {
- netdev_err(dev, "Unable to init VLANs\n");
- return err;
- }
-
- /* Add implicit VLAN interface in the device, so that untagged
- * packets will be classified to the default vFID.
- */
- err = mlxsw_sp_port_add_vid(dev, 0, 1);
- if (err)
- netdev_err(dev, "Failed to configure default vFID\n");
-
- return err;
-}
-
void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 470d769..ed8e301 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -56,6 +56,10 @@
MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34,
MLXSW_TRAP_ID_ARPBC = 0x50,
MLXSW_TRAP_ID_ARPUC = 0x51,
+ MLXSW_TRAP_ID_MTUERROR = 0x52,
+ MLXSW_TRAP_ID_TTLERROR = 0x53,
+ MLXSW_TRAP_ID_LBERROR = 0x54,
+ MLXSW_TRAP_ID_OSPF = 0x55,
MLXSW_TRAP_ID_IP2ME = 0x5F,
MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index d0dc28f..226cb08 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -52,40 +52,94 @@
DCBX_APP_SF_ETHTYPE);
}
+static bool qed_dcbx_ieee_app_ethtype(u32 app_info_bitmap)
+{
+ u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
+
+ /* Old MFW */
+ if (mfw_val == DCBX_APP_SF_IEEE_RESERVED)
+ return qed_dcbx_app_ethtype(app_info_bitmap);
+
+ return !!(mfw_val == DCBX_APP_SF_IEEE_ETHTYPE);
+}
+
static bool qed_dcbx_app_port(u32 app_info_bitmap)
{
return !!(QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF) ==
DCBX_APP_SF_PORT);
}
-static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_ieee_app_port(u32 app_info_bitmap, u8 type)
{
- return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
- proto_id == QED_ETH_TYPE_DEFAULT);
+ u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
+
+ /* Old MFW */
+ if (mfw_val == DCBX_APP_SF_IEEE_RESERVED)
+ return qed_dcbx_app_port(app_info_bitmap);
+
+ return !!(mfw_val == type || mfw_val == DCBX_APP_SF_IEEE_TCP_UDP_PORT);
}
-static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
{
- return !!(qed_dcbx_app_port(app_info_bitmap) &&
- proto_id == QED_TCP_PORT_ISCSI);
+ bool ethtype;
+
+ if (ieee)
+ ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+ else
+ ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+ return !!(ethtype && (proto_id == QED_ETH_TYPE_DEFAULT));
}
-static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
{
- return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
- proto_id == QED_ETH_TYPE_FCOE);
+ bool port;
+
+ if (ieee)
+ port = qed_dcbx_ieee_app_port(app_info_bitmap,
+ DCBX_APP_SF_IEEE_TCP_PORT);
+ else
+ port = qed_dcbx_app_port(app_info_bitmap);
+
+ return !!(port && (proto_id == QED_TCP_PORT_ISCSI));
}
-static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
{
- return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
- proto_id == QED_ETH_TYPE_ROCE);
+ bool ethtype;
+
+ if (ieee)
+ ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+ else
+ ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+ return !!(ethtype && (proto_id == QED_ETH_TYPE_FCOE));
}
-static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
{
- return !!(qed_dcbx_app_port(app_info_bitmap) &&
- proto_id == QED_UDP_PORT_TYPE_ROCE_V2);
+ bool ethtype;
+
+ if (ieee)
+ ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+ else
+ ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+ return !!(ethtype && (proto_id == QED_ETH_TYPE_ROCE));
+}
+
+static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
+{
+ bool port;
+
+ if (ieee)
+ port = qed_dcbx_ieee_app_port(app_info_bitmap,
+ DCBX_APP_SF_IEEE_UDP_PORT);
+ else
+ port = qed_dcbx_app_port(app_info_bitmap);
+
+ return !!(port && (proto_id == QED_UDP_PORT_TYPE_ROCE_V2));
}
static void
@@ -164,17 +218,17 @@
static bool
qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn,
u32 app_prio_bitmap,
- u16 id, enum dcbx_protocol_type *type)
+ u16 id, enum dcbx_protocol_type *type, bool ieee)
{
- if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id)) {
+ if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id, ieee)) {
*type = DCBX_PROTOCOL_FCOE;
- } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id)) {
+ } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id, ieee)) {
*type = DCBX_PROTOCOL_ROCE;
- } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id)) {
+ } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id, ieee)) {
*type = DCBX_PROTOCOL_ISCSI;
- } else if (qed_dcbx_default_tlv(app_prio_bitmap, id)) {
+ } else if (qed_dcbx_default_tlv(app_prio_bitmap, id, ieee)) {
*type = DCBX_PROTOCOL_ETH;
- } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id)) {
+ } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id, ieee)) {
*type = DCBX_PROTOCOL_ROCE_V2;
} else {
*type = DCBX_MAX_PROTOCOL_TYPE;
@@ -194,17 +248,18 @@
qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
struct qed_dcbx_results *p_data,
struct dcbx_app_priority_entry *p_tbl,
- u32 pri_tc_tbl, int count, bool dcbx_enabled)
+ u32 pri_tc_tbl, int count, u8 dcbx_version)
{
u8 tc, priority_map;
enum dcbx_protocol_type type;
+ bool enable, ieee;
u16 protocol_id;
int priority;
- bool enable;
int i;
DP_VERBOSE(p_hwfn, QED_MSG_DCB, "Num APP entries = %d\n", count);
+ ieee = (dcbx_version == DCBX_CONFIG_VERSION_IEEE);
/* Parse APP TLV */
for (i = 0; i < count; i++) {
protocol_id = QED_MFW_GET_FIELD(p_tbl[i].entry,
@@ -219,7 +274,7 @@
tc = QED_DCBX_PRIO2TC(pri_tc_tbl, priority);
if (qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry,
- protocol_id, &type)) {
+ protocol_id, &type, ieee)) {
/* ETH always have the enable bit reset, as it gets
* vlan information per packet. For other protocols,
* should be set according to the dcbx_enabled
@@ -275,15 +330,12 @@
struct dcbx_ets_feature *p_ets;
struct qed_hw_info *p_info;
u32 pri_tc_tbl, flags;
- bool dcbx_enabled;
+ u8 dcbx_version;
int num_entries;
int rc = 0;
- /* If DCBx version is non zero, then negotiation was
- * successfuly performed
- */
flags = p_hwfn->p_dcbx_info->operational.flags;
- dcbx_enabled = !!QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION);
+ dcbx_version = QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION);
p_app = &p_hwfn->p_dcbx_info->operational.features.app;
p_tbl = p_app->app_pri_tbl;
@@ -295,13 +347,13 @@
num_entries = QED_MFW_GET_FIELD(p_app->flags, DCBX_APP_NUM_ENTRIES);
rc = qed_dcbx_process_tlv(p_hwfn, &data, p_tbl, pri_tc_tbl,
- num_entries, dcbx_enabled);
+ num_entries, dcbx_version);
if (rc)
return rc;
p_info->num_tc = QED_MFW_GET_FIELD(p_ets->flags, DCBX_ETS_MAX_TCS);
data.pf_id = p_hwfn->rel_pf_id;
- data.dcbx_enabled = dcbx_enabled;
+ data.dcbx_enabled = !!dcbx_version;
qed_dcbx_dp_protocol(p_hwfn, &data);
@@ -400,7 +452,7 @@
qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn,
struct dcbx_app_priority_feature *p_app,
struct dcbx_app_priority_entry *p_tbl,
- struct qed_dcbx_params *p_params)
+ struct qed_dcbx_params *p_params, bool ieee)
{
struct qed_app_entry *entry;
u8 pri_map;
@@ -414,15 +466,46 @@
DCBX_APP_NUM_ENTRIES);
for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
entry = &p_params->app_entry[i];
- entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry,
- DCBX_APP_SF));
+ if (ieee) {
+ u8 sf_ieee;
+ u32 val;
+
+ sf_ieee = QED_MFW_GET_FIELD(p_tbl[i].entry,
+ DCBX_APP_SF_IEEE);
+ switch (sf_ieee) {
+ case DCBX_APP_SF_IEEE_RESERVED:
+ /* Old MFW */
+ val = QED_MFW_GET_FIELD(p_tbl[i].entry,
+ DCBX_APP_SF);
+ entry->sf_ieee = val ?
+ QED_DCBX_SF_IEEE_TCP_UDP_PORT :
+ QED_DCBX_SF_IEEE_ETHTYPE;
+ break;
+ case DCBX_APP_SF_IEEE_ETHTYPE:
+ entry->sf_ieee = QED_DCBX_SF_IEEE_ETHTYPE;
+ break;
+ case DCBX_APP_SF_IEEE_TCP_PORT:
+ entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_PORT;
+ break;
+ case DCBX_APP_SF_IEEE_UDP_PORT:
+ entry->sf_ieee = QED_DCBX_SF_IEEE_UDP_PORT;
+ break;
+ case DCBX_APP_SF_IEEE_TCP_UDP_PORT:
+ entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_UDP_PORT;
+ break;
+ }
+ } else {
+ entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry,
+ DCBX_APP_SF));
+ }
+
pri_map = QED_MFW_GET_FIELD(p_tbl[i].entry, DCBX_APP_PRI_MAP);
entry->prio = ffs(pri_map) - 1;
entry->proto_id = QED_MFW_GET_FIELD(p_tbl[i].entry,
DCBX_APP_PROTOCOL_ID);
qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry,
entry->proto_id,
- &entry->proto_type);
+ &entry->proto_type, ieee);
}
DP_VERBOSE(p_hwfn, QED_MSG_DCB,
@@ -483,7 +566,7 @@
bw_map[1] = be32_to_cpu(p_ets->tc_bw_tbl[1]);
tsa_map[0] = be32_to_cpu(p_ets->tc_tsa_tbl[0]);
tsa_map[1] = be32_to_cpu(p_ets->tc_tsa_tbl[1]);
- pri_map = be32_to_cpu(p_ets->pri_tc_tbl[0]);
+ pri_map = p_ets->pri_tc_tbl[0];
for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++) {
p_params->ets_tc_bw_tbl[i] = ((u8 *)bw_map)[i];
p_params->ets_tc_tsa_tbl[i] = ((u8 *)tsa_map)[i];
@@ -500,9 +583,9 @@
struct dcbx_app_priority_feature *p_app,
struct dcbx_app_priority_entry *p_tbl,
struct dcbx_ets_feature *p_ets,
- u32 pfc, struct qed_dcbx_params *p_params)
+ u32 pfc, struct qed_dcbx_params *p_params, bool ieee)
{
- qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params);
+ qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params, ieee);
qed_dcbx_get_ets_data(p_hwfn, p_ets, p_params);
qed_dcbx_get_pfc_data(p_hwfn, pfc, p_params);
}
@@ -516,7 +599,7 @@
p_feat = &p_hwfn->p_dcbx_info->local_admin.features;
qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
p_feat->app.app_pri_tbl, &p_feat->ets,
- p_feat->pfc, ¶ms->local.params);
+ p_feat->pfc, ¶ms->local.params, false);
params->local.valid = true;
}
@@ -529,7 +612,7 @@
p_feat = &p_hwfn->p_dcbx_info->remote.features;
qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
p_feat->app.app_pri_tbl, &p_feat->ets,
- p_feat->pfc, ¶ms->remote.params);
+ p_feat->pfc, ¶ms->remote.params, false);
params->remote.valid = true;
}
@@ -574,7 +657,8 @@
qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
p_feat->app.app_pri_tbl, &p_feat->ets,
- p_feat->pfc, ¶ms->operational.params);
+ p_feat->pfc, ¶ms->operational.params,
+ p_operational->ieee);
qed_dcbx_get_priority_info(p_hwfn, &p_operational->app_prio, p_results);
err = QED_MFW_GET_FIELD(p_feat->app.flags, DCBX_APP_ERROR);
p_operational->err = err;
@@ -944,7 +1028,6 @@
val = (((u32)p_params->ets_pri_tc_tbl[i]) << ((7 - i) * 4));
p_ets->pri_tc_tbl[0] |= val;
}
- p_ets->pri_tc_tbl[0] = cpu_to_be32(p_ets->pri_tc_tbl[0]);
for (i = 0; i < 2; i++) {
p_ets->tc_bw_tbl[i] = cpu_to_be32(p_ets->tc_bw_tbl[i]);
p_ets->tc_tsa_tbl[i] = cpu_to_be32(p_ets->tc_tsa_tbl[i]);
@@ -954,7 +1037,7 @@
static void
qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn,
struct dcbx_app_priority_feature *p_app,
- struct qed_dcbx_params *p_params)
+ struct qed_dcbx_params *p_params, bool ieee)
{
u32 *entry;
int i;
@@ -975,12 +1058,36 @@
for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
entry = &p_app->app_pri_tbl[i].entry;
- *entry &= ~DCBX_APP_SF_MASK;
- if (p_params->app_entry[i].ethtype)
- *entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
- DCBX_APP_SF_SHIFT);
- else
- *entry |= ((u32)DCBX_APP_SF_PORT << DCBX_APP_SF_SHIFT);
+ if (ieee) {
+ *entry &= ~DCBX_APP_SF_IEEE_MASK;
+ switch (p_params->app_entry[i].sf_ieee) {
+ case QED_DCBX_SF_IEEE_ETHTYPE:
+ *entry |= ((u32)DCBX_APP_SF_IEEE_ETHTYPE <<
+ DCBX_APP_SF_IEEE_SHIFT);
+ break;
+ case QED_DCBX_SF_IEEE_TCP_PORT:
+ *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_PORT <<
+ DCBX_APP_SF_IEEE_SHIFT);
+ break;
+ case QED_DCBX_SF_IEEE_UDP_PORT:
+ *entry |= ((u32)DCBX_APP_SF_IEEE_UDP_PORT <<
+ DCBX_APP_SF_IEEE_SHIFT);
+ break;
+ case QED_DCBX_SF_IEEE_TCP_UDP_PORT:
+ *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_UDP_PORT <<
+ DCBX_APP_SF_IEEE_SHIFT);
+ break;
+ }
+ } else {
+ *entry &= ~DCBX_APP_SF_MASK;
+ if (p_params->app_entry[i].ethtype)
+ *entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
+ DCBX_APP_SF_SHIFT);
+ else
+ *entry |= ((u32)DCBX_APP_SF_PORT <<
+ DCBX_APP_SF_SHIFT);
+ }
+
*entry &= ~DCBX_APP_PROTOCOL_ID_MASK;
*entry |= ((u32)p_params->app_entry[i].proto_id <<
DCBX_APP_PROTOCOL_ID_SHIFT);
@@ -995,15 +1102,19 @@
struct dcbx_local_params *local_admin,
struct qed_dcbx_set *params)
{
+ bool ieee = false;
+
local_admin->flags = 0;
memcpy(&local_admin->features,
&p_hwfn->p_dcbx_info->operational.features,
sizeof(local_admin->features));
- if (params->enabled)
+ if (params->enabled) {
local_admin->config = params->ver_num;
- else
+ ieee = !!(params->ver_num & DCBX_CONFIG_VERSION_IEEE);
+ } else {
local_admin->config = DCBX_CONFIG_VERSION_DISABLED;
+ }
if (params->override_flags & QED_DCBX_OVERRIDE_PFC_CFG)
qed_dcbx_set_pfc_data(p_hwfn, &local_admin->features.pfc,
@@ -1015,7 +1126,7 @@
if (params->override_flags & QED_DCBX_OVERRIDE_APP_CFG)
qed_dcbx_set_app_data(p_hwfn, &local_admin->features.app,
- ¶ms->config.params);
+ ¶ms->config.params, ieee);
}
int qed_dcbx_config_params(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
@@ -1596,8 +1707,10 @@
if ((entry->ethtype == ethtype) && (entry->proto_id == idval))
break;
/* First empty slot */
- if (!entry->proto_id)
+ if (!entry->proto_id) {
+ dcbx_set.config.params.num_app_entries++;
break;
+ }
}
if (i == QED_DCBX_MAX_APP_PROTOCOL) {
@@ -2117,8 +2230,10 @@
(entry->proto_id == app->protocol))
break;
/* First empty slot */
- if (!entry->proto_id)
+ if (!entry->proto_id) {
+ dcbx_set.config.params.num_app_entries++;
break;
+ }
}
if (i == QED_DCBX_MAX_APP_PROTOCOL) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 5927840..6f9d3b8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -6850,6 +6850,14 @@
#define DCBX_APP_SF_SHIFT 8
#define DCBX_APP_SF_ETHTYPE 0
#define DCBX_APP_SF_PORT 1
+#define DCBX_APP_SF_IEEE_MASK 0x0000f000
+#define DCBX_APP_SF_IEEE_SHIFT 12
+#define DCBX_APP_SF_IEEE_RESERVED 0
+#define DCBX_APP_SF_IEEE_ETHTYPE 1
+#define DCBX_APP_SF_IEEE_TCP_PORT 2
+#define DCBX_APP_SF_IEEE_UDP_PORT 3
+#define DCBX_APP_SF_IEEE_TCP_UDP_PORT 4
+
#define DCBX_APP_PROTOCOL_ID_MASK 0xffff0000
#define DCBX_APP_PROTOCOL_ID_SHIFT 16
};
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index fd973f4..49bad00 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -37,8 +37,8 @@
#define _QLCNIC_LINUX_MAJOR 5
#define _QLCNIC_LINUX_MINOR 3
-#define _QLCNIC_LINUX_SUBVERSION 64
-#define QLCNIC_LINUX_VERSIONID "5.3.64"
+#define _QLCNIC_LINUX_SUBVERSION 65
+#define QLCNIC_LINUX_VERSIONID "5.3.65"
#define QLCNIC_DRV_IDC_VER 0x01
#define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\
(_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 87c642d..fedd736 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -102,7 +102,6 @@
#define QLCNIC_RESPONSE_DESC 0x05
#define QLCNIC_LRO_DESC 0x12
-#define QLCNIC_TX_POLL_BUDGET 128
#define QLCNIC_TCP_HDR_SIZE 20
#define QLCNIC_TCP_TS_OPTION_SIZE 12
#define QLCNIC_FETCH_RING_ID(handle) ((handle) >> 63)
@@ -2008,7 +2007,6 @@
struct qlcnic_host_tx_ring *tx_ring;
struct qlcnic_adapter *adapter;
- budget = QLCNIC_TX_POLL_BUDGET;
tx_ring = container_of(napi, struct qlcnic_host_tx_ring, napi);
adapter = tx_ring->adapter;
work_done = qlcnic_process_cmd_ring(adapter, tx_ring, budget);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
index 017d8c2c..24061b9 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
@@ -156,10 +156,8 @@
spinlock_t vlan_list_lock; /* Lock for VLAN list */
};
-struct qlcnic_async_work_list {
+struct qlcnic_async_cmd {
struct list_head list;
- struct work_struct work;
- void *ptr;
struct qlcnic_cmd_args *cmd;
};
@@ -168,7 +166,10 @@
struct workqueue_struct *bc_trans_wq;
struct workqueue_struct *bc_async_wq;
struct workqueue_struct *bc_flr_wq;
- struct list_head async_list;
+ struct qlcnic_adapter *adapter;
+ struct list_head async_cmd_list;
+ struct work_struct vf_async_work;
+ spinlock_t queue_lock; /* async_cmd_list queue lock */
};
struct qlcnic_sriov {
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 7327b72..d710705 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -29,6 +29,7 @@
#define QLC_83XX_VF_RESET_FAIL_THRESH 8
#define QLC_BC_CMD_MAX_RETRY_CNT 5
+static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work);
static void qlcnic_sriov_vf_free_mac_list(struct qlcnic_adapter *);
static int qlcnic_sriov_alloc_bc_mbx_args(struct qlcnic_cmd_args *, u32);
static void qlcnic_sriov_vf_poll_dev_state(struct work_struct *);
@@ -177,7 +178,10 @@
}
bc->bc_async_wq = wq;
- INIT_LIST_HEAD(&bc->async_list);
+ INIT_LIST_HEAD(&bc->async_cmd_list);
+ INIT_WORK(&bc->vf_async_work, qlcnic_sriov_handle_async_issue_cmd);
+ spin_lock_init(&bc->queue_lock);
+ bc->adapter = adapter;
for (i = 0; i < num_vfs; i++) {
vf = &sriov->vf_info[i];
@@ -1517,17 +1521,21 @@
void qlcnic_sriov_cleanup_async_list(struct qlcnic_back_channel *bc)
{
- struct list_head *head = &bc->async_list;
- struct qlcnic_async_work_list *entry;
+ struct list_head *head = &bc->async_cmd_list;
+ struct qlcnic_async_cmd *entry;
flush_workqueue(bc->bc_async_wq);
+ cancel_work_sync(&bc->vf_async_work);
+
+ spin_lock(&bc->queue_lock);
while (!list_empty(head)) {
- entry = list_entry(head->next, struct qlcnic_async_work_list,
+ entry = list_entry(head->next, struct qlcnic_async_cmd,
list);
- cancel_work_sync(&entry->work);
list_del(&entry->list);
+ kfree(entry->cmd);
kfree(entry);
}
+ spin_unlock(&bc->queue_lock);
}
void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
@@ -1587,57 +1595,64 @@
static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work)
{
- struct qlcnic_async_work_list *entry;
- struct qlcnic_adapter *adapter;
+ struct qlcnic_async_cmd *entry, *tmp;
+ struct qlcnic_back_channel *bc;
struct qlcnic_cmd_args *cmd;
+ struct list_head *head;
+ LIST_HEAD(del_list);
- entry = container_of(work, struct qlcnic_async_work_list, work);
- adapter = entry->ptr;
- cmd = entry->cmd;
- __qlcnic_sriov_issue_cmd(adapter, cmd);
+ bc = container_of(work, struct qlcnic_back_channel, vf_async_work);
+ head = &bc->async_cmd_list;
+
+ spin_lock(&bc->queue_lock);
+ list_splice_init(head, &del_list);
+ spin_unlock(&bc->queue_lock);
+
+ list_for_each_entry_safe(entry, tmp, &del_list, list) {
+ list_del(&entry->list);
+ cmd = entry->cmd;
+ __qlcnic_sriov_issue_cmd(bc->adapter, cmd);
+ kfree(entry);
+ }
+
+ if (!list_empty(head))
+ queue_work(bc->bc_async_wq, &bc->vf_async_work);
+
return;
}
-static struct qlcnic_async_work_list *
-qlcnic_sriov_get_free_node_async_work(struct qlcnic_back_channel *bc)
+static struct qlcnic_async_cmd *
+qlcnic_sriov_alloc_async_cmd(struct qlcnic_back_channel *bc,
+ struct qlcnic_cmd_args *cmd)
{
- struct list_head *node;
- struct qlcnic_async_work_list *entry = NULL;
- u8 empty = 0;
+ struct qlcnic_async_cmd *entry = NULL;
- list_for_each(node, &bc->async_list) {
- entry = list_entry(node, struct qlcnic_async_work_list, list);
- if (!work_pending(&entry->work)) {
- empty = 1;
- break;
- }
- }
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ return NULL;
- if (!empty) {
- entry = kzalloc(sizeof(struct qlcnic_async_work_list),
- GFP_ATOMIC);
- if (entry == NULL)
- return NULL;
- list_add_tail(&entry->list, &bc->async_list);
- }
+ entry->cmd = cmd;
+
+ spin_lock(&bc->queue_lock);
+ list_add_tail(&entry->list, &bc->async_cmd_list);
+ spin_unlock(&bc->queue_lock);
return entry;
}
static void qlcnic_sriov_schedule_async_cmd(struct qlcnic_back_channel *bc,
- work_func_t func, void *data,
struct qlcnic_cmd_args *cmd)
{
- struct qlcnic_async_work_list *entry = NULL;
+ struct qlcnic_async_cmd *entry = NULL;
- entry = qlcnic_sriov_get_free_node_async_work(bc);
- if (!entry)
+ entry = qlcnic_sriov_alloc_async_cmd(bc, cmd);
+ if (!entry) {
+ qlcnic_free_mbx_args(cmd);
+ kfree(cmd);
return;
+ }
- entry->ptr = data;
- entry->cmd = cmd;
- INIT_WORK(&entry->work, func);
- queue_work(bc->bc_async_wq, &entry->work);
+ queue_work(bc->bc_async_wq, &bc->vf_async_work);
}
static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter,
@@ -1649,8 +1664,8 @@
if (adapter->need_fw_reset)
return -EIO;
- qlcnic_sriov_schedule_async_cmd(bc, qlcnic_sriov_handle_async_issue_cmd,
- adapter, cmd);
+ qlcnic_sriov_schedule_async_cmd(bc, cmd);
+
return 0;
}
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index c51f346..f85d605 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -734,6 +734,7 @@
netif_receive_skb(skb);
ndev->stats.rx_bytes += len;
ndev->stats.rx_packets++;
+ kmemleak_not_leak(new_skb);
} else {
ndev->stats.rx_dropped++;
new_skb = skb;
@@ -1325,6 +1326,7 @@
kfree_skb(skb);
goto err_cleanup;
}
+ kmemleak_not_leak(skb);
}
/* continue even if we didn't manage to submit all
* receive descs
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index 01a7714..8fd1312 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -166,6 +166,7 @@
static void tsi108_timed_checker(unsigned long dev_ptr);
+#ifdef DEBUG
static void dump_eth_one(struct net_device *dev)
{
struct tsi108_prv_data *data = netdev_priv(dev);
@@ -190,6 +191,7 @@
TSI_READ(TSI108_EC_RXESTAT),
TSI_READ(TSI108_EC_RXERR), data->rxpending);
}
+#endif
/* Synchronization is needed between the thread and up/down events.
* Note that the PHY is accessed through the same registers for both
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 467fb8b..591af71 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -644,12 +644,6 @@
u32 event;
};
-struct garp_wrk {
- struct work_struct dwrk;
- struct net_device *netdev;
- struct netvsc_device *netvsc_dev;
-};
-
/* The context of the netvsc device */
struct net_device_context {
/* point back to our device context */
@@ -667,7 +661,6 @@
struct work_struct work;
u32 msg_enable; /* debug level */
- struct garp_wrk gwrk;
struct netvsc_stats __percpu *tx_stats;
struct netvsc_stats __percpu *rx_stats;
@@ -678,6 +671,15 @@
/* the device is going away */
bool start_remove;
+
+ /* State to manage the associated VF interface. */
+ struct net_device *vf_netdev;
+ bool vf_inject;
+ atomic_t vf_use_cnt;
+ /* 1: allocated, serial number is valid. 0: not allocated */
+ u32 vf_alloc;
+ /* Serial number of the VF to team with */
+ u32 vf_serial;
};
/* Per netvsc device */
@@ -733,15 +735,7 @@
u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
u32 pkt_align; /* alignment bytes, e.g. 8 */
- /* 1: allocated, serial number is valid. 0: not allocated */
- u32 vf_alloc;
- /* Serial number of the VF to team with */
- u32 vf_serial;
atomic_t open_cnt;
- /* State to manage the associated VF interface. */
- bool vf_inject;
- struct net_device *vf_netdev;
- atomic_t vf_use_cnt;
};
static inline struct netvsc_device *
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 20e0917..410fb8e8 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -77,13 +77,9 @@
init_waitqueue_head(&net_device->wait_drain);
net_device->destroy = false;
atomic_set(&net_device->open_cnt, 0);
- atomic_set(&net_device->vf_use_cnt, 0);
net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
- net_device->vf_netdev = NULL;
- net_device->vf_inject = false;
-
return net_device;
}
@@ -1106,16 +1102,16 @@
nvscdev->send_table[i] = tab[i];
}
-static void netvsc_send_vf(struct netvsc_device *nvdev,
+static void netvsc_send_vf(struct net_device_context *net_device_ctx,
struct nvsp_message *nvmsg)
{
- nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
- nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
+ net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
+ net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
}
static inline void netvsc_receive_inband(struct hv_device *hdev,
- struct netvsc_device *nvdev,
- struct nvsp_message *nvmsg)
+ struct net_device_context *net_device_ctx,
+ struct nvsp_message *nvmsg)
{
switch (nvmsg->hdr.msg_type) {
case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
@@ -1123,7 +1119,7 @@
break;
case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
- netvsc_send_vf(nvdev, nvmsg);
+ netvsc_send_vf(net_device_ctx, nvmsg);
break;
}
}
@@ -1136,6 +1132,7 @@
struct vmpacket_descriptor *desc)
{
struct nvsp_message *nvmsg;
+ struct net_device_context *net_device_ctx = netdev_priv(ndev);
nvmsg = (struct nvsp_message *)((unsigned long)
desc + (desc->offset8 << 3));
@@ -1150,7 +1147,7 @@
break;
case VM_PKT_DATA_INBAND:
- netvsc_receive_inband(device, net_device, nvmsg);
+ netvsc_receive_inband(device, net_device_ctx, nvmsg);
break;
default:
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 41bd952..3ba29fc 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -658,20 +658,19 @@
struct sk_buff *skb;
struct sk_buff *vf_skb;
struct netvsc_stats *rx_stats;
- struct netvsc_device *netvsc_dev = net_device_ctx->nvdev;
u32 bytes_recvd = packet->total_data_buflen;
int ret = 0;
if (!net || net->reg_state != NETREG_REGISTERED)
return NVSP_STAT_FAIL;
- if (READ_ONCE(netvsc_dev->vf_inject)) {
- atomic_inc(&netvsc_dev->vf_use_cnt);
- if (!READ_ONCE(netvsc_dev->vf_inject)) {
+ if (READ_ONCE(net_device_ctx->vf_inject)) {
+ atomic_inc(&net_device_ctx->vf_use_cnt);
+ if (!READ_ONCE(net_device_ctx->vf_inject)) {
/*
* We raced; just move on.
*/
- atomic_dec(&netvsc_dev->vf_use_cnt);
+ atomic_dec(&net_device_ctx->vf_use_cnt);
goto vf_injection_done;
}
@@ -683,17 +682,19 @@
* the host). Deliver these via the VF interface
* in the guest.
*/
- vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet,
- csum_info, *data, vlan_tci);
+ vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev,
+ packet, csum_info, *data,
+ vlan_tci);
if (vf_skb != NULL) {
- ++netvsc_dev->vf_netdev->stats.rx_packets;
- netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd;
+ ++net_device_ctx->vf_netdev->stats.rx_packets;
+ net_device_ctx->vf_netdev->stats.rx_bytes +=
+ bytes_recvd;
netif_receive_skb(vf_skb);
} else {
++net->stats.rx_dropped;
ret = NVSP_STAT_FAIL;
}
- atomic_dec(&netvsc_dev->vf_use_cnt);
+ atomic_dec(&net_device_ctx->vf_use_cnt);
return ret;
}
@@ -1150,17 +1151,6 @@
free_netdev(netdev);
}
-static void netvsc_notify_peers(struct work_struct *wrk)
-{
- struct garp_wrk *gwrk;
-
- gwrk = container_of(wrk, struct garp_wrk, dwrk);
-
- netdev_notify_peers(gwrk->netdev);
-
- atomic_dec(&gwrk->netvsc_dev->vf_use_cnt);
-}
-
static struct net_device *get_netvsc_net_device(char *mac)
{
struct net_device *dev, *found = NULL;
@@ -1203,7 +1193,7 @@
net_device_ctx = netdev_priv(ndev);
netvsc_dev = net_device_ctx->nvdev;
- if (netvsc_dev == NULL)
+ if (!netvsc_dev || net_device_ctx->vf_netdev)
return NOTIFY_DONE;
netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
@@ -1211,10 +1201,23 @@
* Take a reference on the module.
*/
try_module_get(THIS_MODULE);
- netvsc_dev->vf_netdev = vf_netdev;
+ net_device_ctx->vf_netdev = vf_netdev;
return NOTIFY_OK;
}
+static void netvsc_inject_enable(struct net_device_context *net_device_ctx)
+{
+ net_device_ctx->vf_inject = true;
+}
+
+static void netvsc_inject_disable(struct net_device_context *net_device_ctx)
+{
+ net_device_ctx->vf_inject = false;
+
+ /* Wait for currently active users to drain out. */
+ while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
+ udelay(50);
+}
static int netvsc_vf_up(struct net_device *vf_netdev)
{
@@ -1233,11 +1236,11 @@
net_device_ctx = netdev_priv(ndev);
netvsc_dev = net_device_ctx->nvdev;
- if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
+ if (!netvsc_dev || !net_device_ctx->vf_netdev)
return NOTIFY_DONE;
netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
- netvsc_dev->vf_inject = true;
+ netvsc_inject_enable(net_device_ctx);
/*
* Open the device before switching data path.
@@ -1252,15 +1255,8 @@
netif_carrier_off(ndev);
- /*
- * Now notify peers. We are scheduling work to
- * notify peers; take a reference to prevent
- * the VF interface from vanishing.
- */
- atomic_inc(&netvsc_dev->vf_use_cnt);
- net_device_ctx->gwrk.netdev = vf_netdev;
- net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
- schedule_work(&net_device_ctx->gwrk.dwrk);
+ /* Now notify peers through VF device. */
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
return NOTIFY_OK;
}
@@ -1283,29 +1279,18 @@
net_device_ctx = netdev_priv(ndev);
netvsc_dev = net_device_ctx->nvdev;
- if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
+ if (!netvsc_dev || !net_device_ctx->vf_netdev)
return NOTIFY_DONE;
netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
- netvsc_dev->vf_inject = false;
- /*
- * Wait for currently active users to
- * drain out.
- */
-
- while (atomic_read(&netvsc_dev->vf_use_cnt) != 0)
- udelay(50);
+ netvsc_inject_disable(net_device_ctx);
netvsc_switch_datapath(ndev, false);
netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
rndis_filter_close(netvsc_dev);
netif_carrier_on(ndev);
- /*
- * Notify peers.
- */
- atomic_inc(&netvsc_dev->vf_use_cnt);
- net_device_ctx->gwrk.netdev = ndev;
- net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
- schedule_work(&net_device_ctx->gwrk.dwrk);
+
+ /* Now notify peers through netvsc device. */
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
return NOTIFY_OK;
}
@@ -1327,11 +1312,11 @@
net_device_ctx = netdev_priv(ndev);
netvsc_dev = net_device_ctx->nvdev;
- if (netvsc_dev == NULL)
+ if (!netvsc_dev || !net_device_ctx->vf_netdev)
return NOTIFY_DONE;
netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
-
- netvsc_dev->vf_netdev = NULL;
+ netvsc_inject_disable(net_device_ctx);
+ net_device_ctx->vf_netdev = NULL;
module_put(THIS_MODULE);
return NOTIFY_OK;
}
@@ -1377,11 +1362,14 @@
INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
INIT_WORK(&net_device_ctx->work, do_set_multicast);
- INIT_WORK(&net_device_ctx->gwrk.dwrk, netvsc_notify_peers);
spin_lock_init(&net_device_ctx->lock);
INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
+ atomic_set(&net_device_ctx->vf_use_cnt, 0);
+ net_device_ctx->vf_netdev = NULL;
+ net_device_ctx->vf_inject = false;
+
net->netdev_ops = &device_ops;
net->hw_features = NETVSC_HW_FEATURES;
@@ -1494,8 +1482,13 @@
{
struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
- /* Avoid Vlan, Bonding dev with same MAC registering as VF */
- if (event_dev->priv_flags & (IFF_802_1Q_VLAN | IFF_BONDING))
+ /* Avoid Vlan dev with same MAC registering as VF */
+ if (event_dev->priv_flags & IFF_802_1Q_VLAN)
+ return NOTIFY_DONE;
+
+ /* Avoid Bonding master dev with same MAC registering as VF */
+ if (event_dev->priv_flags & IFF_BONDING &&
+ event_dev->flags & IFF_MASTER)
return NOTIFY_DONE;
switch (event) {
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index d13e6e1..351e701 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -270,6 +270,7 @@
struct pcpu_secy_stats __percpu *stats;
struct list_head secys;
struct gro_cells gro_cells;
+ unsigned int nest_level;
};
/**
@@ -2699,6 +2700,8 @@
#define MACSEC_FEATURES \
(NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST)
+static struct lock_class_key macsec_netdev_addr_lock_key;
+
static int macsec_dev_init(struct net_device *dev)
{
struct macsec_dev *macsec = macsec_priv(dev);
@@ -2910,6 +2913,13 @@
return macsec_priv(dev)->real_dev->ifindex;
}
+
+static int macsec_get_nest_level(struct net_device *dev)
+{
+ return macsec_priv(dev)->nest_level;
+}
+
+
static const struct net_device_ops macsec_netdev_ops = {
.ndo_init = macsec_dev_init,
.ndo_uninit = macsec_dev_uninit,
@@ -2923,6 +2933,7 @@
.ndo_start_xmit = macsec_start_xmit,
.ndo_get_stats64 = macsec_get_stats64,
.ndo_get_iflink = macsec_get_iflink,
+ .ndo_get_lock_subclass = macsec_get_nest_level,
};
static const struct device_type macsec_type = {
@@ -3047,22 +3058,31 @@
}
}
+static void macsec_common_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct macsec_dev *macsec = macsec_priv(dev);
+ struct net_device *real_dev = macsec->real_dev;
+
+ unregister_netdevice_queue(dev, head);
+ list_del_rcu(&macsec->secys);
+ macsec_del_dev(macsec);
+ netdev_upper_dev_unlink(real_dev, dev);
+
+ macsec_generation++;
+}
+
static void macsec_dellink(struct net_device *dev, struct list_head *head)
{
struct macsec_dev *macsec = macsec_priv(dev);
struct net_device *real_dev = macsec->real_dev;
struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev);
- macsec_generation++;
+ macsec_common_dellink(dev, head);
- unregister_netdevice_queue(dev, head);
- list_del_rcu(&macsec->secys);
if (list_empty(&rxd->secys)) {
netdev_rx_handler_unregister(real_dev);
kfree(rxd);
}
-
- macsec_del_dev(macsec);
}
static int register_macsec_dev(struct net_device *real_dev,
@@ -3181,6 +3201,16 @@
dev_hold(real_dev);
+ macsec->nest_level = dev_get_nest_level(real_dev) + 1;
+ netdev_lockdep_set_classes(dev);
+ lockdep_set_class_and_subclass(&dev->addr_list_lock,
+ &macsec_netdev_addr_lock_key,
+ macsec_get_nest_level(dev));
+
+ err = netdev_upper_dev_link(real_dev, dev);
+ if (err < 0)
+ goto unregister;
+
/* need to be already registered so that ->init has run and
* the MAC addr is set
*/
@@ -3193,12 +3223,12 @@
if (rx_handler && sci_exists(real_dev, sci)) {
err = -EBUSY;
- goto unregister;
+ goto unlink;
}
err = macsec_add_dev(dev, sci, icv_len);
if (err)
- goto unregister;
+ goto unlink;
if (data)
macsec_changelink_common(dev, data);
@@ -3213,6 +3243,8 @@
del_dev:
macsec_del_dev(macsec);
+unlink:
+ netdev_upper_dev_unlink(real_dev, dev);
unregister:
unregister_netdevice(dev);
return err;
@@ -3382,8 +3414,12 @@
rxd = macsec_data_rtnl(real_dev);
list_for_each_entry_safe(m, n, &rxd->secys, secys) {
- macsec_dellink(m->secy.netdev, &head);
+ macsec_common_dellink(m->secy.netdev, &head);
}
+
+ netdev_rx_handler_unregister(real_dev);
+ kfree(rxd);
+
unregister_netdevice_many(&head);
break;
}
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index cd9b538..3234fcd 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1315,7 +1315,7 @@
vlan->dev = dev;
vlan->port = port;
vlan->set_features = MACVLAN_FEATURES;
- vlan->nest_level = dev_get_nest_level(lowerdev, netif_is_macvlan) + 1;
+ vlan->nest_level = dev_get_nest_level(lowerdev) + 1;
vlan->mode = MACVLAN_MODE_VEPA;
if (data && data[IFLA_MACVLAN_MODE])
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index a38c0da..070e329 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -275,7 +275,6 @@
rtnl_unlock();
synchronize_rcu();
- skb_array_cleanup(&q->skb_array);
sock_put(&q->sk);
}
@@ -533,10 +532,8 @@
static void macvtap_sock_destruct(struct sock *sk)
{
struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk);
- struct sk_buff *skb;
- while ((skb = skb_array_consume(&q->skb_array)) != NULL)
- kfree_skb(skb);
+ skb_array_cleanup(&q->skb_array);
}
static int macvtap_open(struct inode *inode, struct file *file)
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 1882d98..053e879 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -677,17 +677,28 @@
data[i] = kszphy_get_stat(phydev, i);
}
+static int kszphy_suspend(struct phy_device *phydev)
+{
+ /* Disable PHY Interrupts */
+ if (phy_interrupt_is_valid(phydev)) {
+ phydev->interrupts = PHY_INTERRUPT_DISABLED;
+ if (phydev->drv->config_intr)
+ phydev->drv->config_intr(phydev);
+ }
+
+ return genphy_suspend(phydev);
+}
+
static int kszphy_resume(struct phy_device *phydev)
{
- int value;
+ genphy_resume(phydev);
- mutex_lock(&phydev->lock);
-
- value = phy_read(phydev, MII_BMCR);
- phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
-
- kszphy_config_intr(phydev);
- mutex_unlock(&phydev->lock);
+ /* Enable PHY Interrupts */
+ if (phy_interrupt_is_valid(phydev)) {
+ phydev->interrupts = PHY_INTERRUPT_ENABLED;
+ if (phydev->drv->config_intr)
+ phydev->drv->config_intr(phydev);
+ }
return 0;
}
@@ -900,7 +911,7 @@
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
- .suspend = genphy_suspend,
+ .suspend = kszphy_suspend,
.resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8061,
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index da4e3d6..c0dda6f 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1811,7 +1811,7 @@
fl4.flowi4_mark = skb->mark;
fl4.flowi4_proto = IPPROTO_UDP;
fl4.daddr = daddr;
- fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
+ fl4.saddr = *saddr;
rt = ip_route_output_key(vxlan->net, &fl4);
if (!IS_ERR(rt)) {
@@ -1847,7 +1847,7 @@
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
fl6.daddr = *daddr;
- fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
+ fl6.saddr = *saddr;
fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
fl6.flowi6_mark = skb->mark;
fl6.flowi6_proto = IPPROTO_UDP;
@@ -1920,7 +1920,8 @@
struct rtable *rt = NULL;
const struct iphdr *old_iph;
union vxlan_addr *dst;
- union vxlan_addr remote_ip;
+ union vxlan_addr remote_ip, local_ip;
+ union vxlan_addr *src;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
__be16 src_port = 0, dst_port;
@@ -1938,6 +1939,7 @@
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = rdst->remote_vni;
dst = &rdst->remote_ip;
+ src = &vxlan->cfg.saddr;
dst_cache = &rdst->dst_cache;
} else {
if (!info) {
@@ -1948,11 +1950,15 @@
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
vni = vxlan_tun_id_to_vni(info->key.tun_id);
remote_ip.sa.sa_family = ip_tunnel_info_af(info);
- if (remote_ip.sa.sa_family == AF_INET)
+ if (remote_ip.sa.sa_family == AF_INET) {
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
- else
+ local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
+ } else {
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
+ local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
+ }
dst = &remote_ip;
+ src = &local_ip;
dst_cache = &info->dst_cache;
}
@@ -1992,15 +1998,14 @@
}
if (dst->sa.sa_family == AF_INET) {
- __be32 saddr;
-
if (!vxlan->vn4_sock)
goto drop;
sk = vxlan->vn4_sock->sock->sk;
rt = vxlan_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, tos,
- dst->sin.sin_addr.s_addr, &saddr,
+ dst->sin.sin_addr.s_addr,
+ &src->sin.sin_addr.s_addr,
dst_cache, info);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n",
@@ -2017,7 +2022,7 @@
}
/* Bypass encapsulation if the destination is local */
- if (rt->rt_flags & RTCF_LOCAL &&
+ if (!info && rt->rt_flags & RTCF_LOCAL &&
!(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
struct vxlan_dev *dst_vxlan;
@@ -2043,13 +2048,12 @@
if (err < 0)
goto xmit_tx_error;
- udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+ udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr,
dst->sin.sin_addr.s_addr, tos, ttl, df,
src_port, dst_port, xnet, !udp_sum);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct dst_entry *ndst;
- struct in6_addr saddr;
u32 rt6i_flags;
if (!vxlan->vn6_sock)
@@ -2058,7 +2062,8 @@
ndst = vxlan6_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, tos,
- label, &dst->sin6.sin6_addr, &saddr,
+ label, &dst->sin6.sin6_addr,
+ &src->sin6.sin6_addr,
dst_cache, info);
if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n",
@@ -2077,7 +2082,7 @@
/* Bypass encapsulation if the destination is local */
rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
- if (rt6i_flags & RTF_LOCAL &&
+ if (!info && rt6i_flags & RTF_LOCAL &&
!(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
struct vxlan_dev *dst_vxlan;
@@ -2104,7 +2109,8 @@
return;
}
udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
- &saddr, &dst->sin6.sin6_addr, tos, ttl,
+ &src->sin6.sin6_addr,
+ &dst->sin6.sin6_addr, tos, ttl,
label, src_port, dst_port, !udp_sum);
#endif
}
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 1d68916..9e1f2d9 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5700,10 +5700,11 @@
mutex_unlock(&wl->mutex);
}
-static u32 wlcore_op_get_expected_throughput(struct ieee80211_sta *sta)
+static u32 wlcore_op_get_expected_throughput(struct ieee80211_hw *hw,
+ struct ieee80211_sta *sta)
{
struct wl1271_station *wl_sta = (struct wl1271_station *)sta->drv_priv;
- struct wl1271 *wl = wl_sta->wl;
+ struct wl1271 *wl = hw->priv;
u8 hlid = wl_sta->hlid;
/* return in units of Kbps */
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 88e9166..368795a 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1269,6 +1269,7 @@
}
}
set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
+ btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
revalidate_disk(btt->btt_disk);
return 0;
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 3fa7919..97dd292 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -140,10 +140,30 @@
}
static DEVICE_ATTR_RW(namespace);
+static ssize_t size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_btt *nd_btt = to_nd_btt(dev);
+ ssize_t rc;
+
+ device_lock(dev);
+ if (dev->driver)
+ rc = sprintf(buf, "%llu\n", nd_btt->size);
+ else {
+ /* no size to convey if the btt instance is disabled */
+ rc = -ENXIO;
+ }
+ device_unlock(dev);
+
+ return rc;
+}
+static DEVICE_ATTR_RO(size);
+
static struct attribute *nd_btt_attributes[] = {
&dev_attr_sector_size.attr,
&dev_attr_namespace.attr,
&dev_attr_uuid.attr,
+ &dev_attr_size.attr,
NULL,
};
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 4047639..8024a0e 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -143,6 +143,7 @@
struct nd_namespace_common *ndns;
struct btt *btt;
unsigned long lbasize;
+ u64 size;
u8 *uuid;
int id;
};
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d7c33f9..8dcf5a9 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1543,15 +1543,10 @@
reinit_completion(&dev->ioq_wait);
retry:
timeout = ADMIN_TIMEOUT;
- for (; i > 0; i--) {
- struct nvme_queue *nvmeq = dev->queues[i];
-
- if (!pass)
- nvme_suspend_queue(nvmeq);
- if (nvme_delete_queue(nvmeq, opcode))
+ for (; i > 0; i--, sent++)
+ if (nvme_delete_queue(dev->queues[i], opcode))
break;
- ++sent;
- }
+
while (sent--) {
timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout);
if (timeout == 0)
@@ -1693,11 +1688,12 @@
nvme_stop_queues(&dev->ctrl);
csts = readl(dev->bar + NVME_REG_CSTS);
}
+
+ for (i = dev->queue_count - 1; i > 0; i--)
+ nvme_suspend_queue(dev->queues[i]);
+
if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
- for (i = dev->queue_count - 1; i >= 0; i--) {
- struct nvme_queue *nvmeq = dev->queues[i];
- nvme_suspend_queue(nvmeq);
- }
+ nvme_suspend_queue(dev->queues[0]);
} else {
nvme_disable_io_queues(dev);
nvme_disable_admin_queue(dev, shutdown);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 3e3ce2b..8d2875b 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -12,13 +12,11 @@
* more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/delay.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/string.h>
-#include <linux/jiffies.h>
#include <linux/atomic.h>
#include <linux/blk-mq.h>
#include <linux/types.h>
@@ -26,7 +24,6 @@
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/nvme.h>
-#include <linux/t10-pi.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
@@ -169,7 +166,6 @@
static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
-static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl);
/* XXX: really should move to a generic header sooner or later.. */
static inline void put_unaligned_le24(u32 val, u8 *p)
@@ -687,11 +683,6 @@
list_del(&ctrl->list);
mutex_unlock(&nvme_rdma_ctrl_mutex);
- if (ctrl->ctrl.tagset) {
- blk_cleanup_queue(ctrl->ctrl.connect_q);
- blk_mq_free_tag_set(&ctrl->tag_set);
- nvme_rdma_dev_put(ctrl->device);
- }
kfree(ctrl->queues);
nvmf_free_options(nctrl->opts);
free_ctrl:
@@ -748,8 +739,11 @@
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
- if (ctrl->queue_count > 1)
+ if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
+ nvme_queue_scan(&ctrl->ctrl);
+ nvme_queue_async_events(&ctrl->ctrl);
+ }
dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
@@ -1269,7 +1263,7 @@
{
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
struct rdma_conn_param param = { };
- struct nvme_rdma_cm_req priv;
+ struct nvme_rdma_cm_req priv = { };
int ret;
param.qp_num = queue->qp->qp_num;
@@ -1318,37 +1312,39 @@
* that caught the event. Since we hold the callout until the controller
* deletion is completed, we'll deadlock if the controller deletion will
* call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
- * of destroying this queue before-hand, destroy the queue resources
- * after the controller deletion completed with the exception of destroying
- * the cm_id implicitely by returning a non-zero rc to the callout.
+ * of destroying this queue before-hand, destroy the queue resources,
+ * then queue the controller deletion which won't destroy this queue and
+ * we destroy the cm_id implicitely by returning a non-zero rc to the callout.
*/
static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
{
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
- int ret, ctrl_deleted = 0;
+ int ret;
- /* First disable the queue so ctrl delete won't free it */
- if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
- goto out;
+ /* Own the controller deletion */
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
+ return 0;
- /* delete the controller */
- ret = __nvme_rdma_del_ctrl(ctrl);
- if (!ret) {
- dev_warn(ctrl->ctrl.device,
- "Got rdma device removal event, deleting ctrl\n");
- flush_work(&ctrl->delete_work);
+ dev_warn(ctrl->ctrl.device,
+ "Got rdma device removal event, deleting ctrl\n");
+
+ /* Get rid of reconnect work if its running */
+ cancel_delayed_work_sync(&ctrl->reconnect_work);
+
+ /* Disable the queue so ctrl delete won't free it */
+ if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) {
+ /* Free this queue ourselves */
+ nvme_rdma_stop_queue(queue);
+ nvme_rdma_destroy_queue_ib(queue);
/* Return non-zero so the cm_id will destroy implicitly */
- ctrl_deleted = 1;
-
- /* Free this queue ourselves */
- rdma_disconnect(queue->cm_id);
- ib_drain_qp(queue->qp);
- nvme_rdma_destroy_queue_ib(queue);
+ ret = 1;
}
-out:
- return ctrl_deleted;
+ /* Queue controller deletion */
+ queue_work(nvme_rdma_wq, &ctrl->delete_work);
+ flush_work(&ctrl->delete_work);
+ return ret;
}
static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
@@ -1648,7 +1644,7 @@
nvme_rdma_free_io_queues(ctrl);
}
- if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+ if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
nvme_shutdown_ctrl(&ctrl->ctrl);
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1657,15 +1653,27 @@
nvme_rdma_destroy_admin_queue(ctrl);
}
+static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
+{
+ nvme_uninit_ctrl(&ctrl->ctrl);
+ if (shutdown)
+ nvme_rdma_shutdown_ctrl(ctrl);
+
+ if (ctrl->ctrl.tagset) {
+ blk_cleanup_queue(ctrl->ctrl.connect_q);
+ blk_mq_free_tag_set(&ctrl->tag_set);
+ nvme_rdma_dev_put(ctrl->device);
+ }
+
+ nvme_put_ctrl(&ctrl->ctrl);
+}
+
static void nvme_rdma_del_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work);
- nvme_remove_namespaces(&ctrl->ctrl);
- nvme_rdma_shutdown_ctrl(ctrl);
- nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
+ __nvme_rdma_remove_ctrl(ctrl, true);
}
static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
@@ -1698,9 +1706,7 @@
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work);
- nvme_remove_namespaces(&ctrl->ctrl);
- nvme_uninit_ctrl(&ctrl->ctrl);
- nvme_put_ctrl(&ctrl->ctrl);
+ __nvme_rdma_remove_ctrl(ctrl, false);
}
static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
@@ -1739,6 +1745,7 @@
if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
nvme_queue_scan(&ctrl->ctrl);
+ nvme_queue_async_events(&ctrl->ctrl);
}
return;
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 2fac17a..47c564b 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -13,7 +13,6 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
-#include <linux/random.h>
#include <generated/utsrelease.h>
#include "nvmet.h"
@@ -83,7 +82,6 @@
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
- u64 serial;
u16 status = 0;
id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -96,10 +94,8 @@
id->vid = 0;
id->ssvid = 0;
- /* generate a random serial number as our controllers are ephemeral: */
- get_random_bytes(&serial, sizeof(serial));
memset(id->sn, ' ', sizeof(id->sn));
- snprintf(id->sn, sizeof(id->sn), "%llx", serial);
+ snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial);
memset(id->mn, ' ', sizeof(id->mn));
strncpy((char *)id->mn, "Linux", sizeof(id->mn));
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 8a891ca..6559d5a 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -13,6 +13,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
+#include <linux/random.h>
#include "nvmet.h"
static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
@@ -728,6 +729,9 @@
memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
+ /* generate a random serial number as our controllers are ephemeral: */
+ get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
+
kref_init(&ctrl->ref);
ctrl->subsys = subsys;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 94e7829..7affd40 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -414,9 +414,8 @@
struct nvme_loop_ctrl *ctrl = container_of(work,
struct nvme_loop_ctrl, delete_work);
- nvme_remove_namespaces(&ctrl->ctrl);
- nvme_loop_shutdown_ctrl(ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
+ nvme_loop_shutdown_ctrl(ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}
@@ -501,7 +500,6 @@
nvme_loop_destroy_admin_queue(ctrl);
out_disable:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
- nvme_remove_namespaces(&ctrl->ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 57dd6d8..76b6eed 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -113,6 +113,7 @@
struct mutex lock;
u64 cap;
+ u64 serial;
u32 cc;
u32 csts;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index e06d504..b4d6485 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -77,6 +77,7 @@
NVMET_RDMA_Q_CONNECTING,
NVMET_RDMA_Q_LIVE,
NVMET_RDMA_Q_DISCONNECTING,
+ NVMET_RDMA_IN_DEVICE_REMOVAL,
};
struct nvmet_rdma_queue {
@@ -615,15 +616,10 @@
if (!len)
return 0;
- /* use the already allocated data buffer if possible */
- if (len <= NVMET_RDMA_INLINE_DATA_SIZE && rsp->queue->host_qid) {
- nvmet_rdma_use_inline_sg(rsp, len, 0);
- } else {
- status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
- len);
- if (status)
- return status;
- }
+ status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
+ len);
+ if (status)
+ return status;
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
@@ -984,7 +980,10 @@
struct nvmet_rdma_device *dev = queue->dev;
nvmet_rdma_free_queue(queue);
- rdma_destroy_id(cm_id);
+
+ if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL)
+ rdma_destroy_id(cm_id);
+
kref_put(&dev->ref, nvmet_rdma_free_dev);
}
@@ -1233,8 +1232,9 @@
switch (queue->state) {
case NVMET_RDMA_Q_CONNECTING:
case NVMET_RDMA_Q_LIVE:
- disconnect = true;
queue->state = NVMET_RDMA_Q_DISCONNECTING;
+ case NVMET_RDMA_IN_DEVICE_REMOVAL:
+ disconnect = true;
break;
case NVMET_RDMA_Q_DISCONNECTING:
break;
@@ -1272,6 +1272,62 @@
schedule_work(&queue->release_work);
}
+/**
+ * nvme_rdma_device_removal() - Handle RDMA device removal
+ * @queue: nvmet rdma queue (cm id qp_context)
+ * @addr: nvmet address (cm_id context)
+ *
+ * DEVICE_REMOVAL event notifies us that the RDMA device is about
+ * to unplug so we should take care of destroying our RDMA resources.
+ * This event will be generated for each allocated cm_id.
+ *
+ * Note that this event can be generated on a normal queue cm_id
+ * and/or a device bound listener cm_id (where in this case
+ * queue will be null).
+ *
+ * we claim ownership on destroying the cm_id. For queues we move
+ * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * we nullify the priv to prevent double cm_id destruction and destroying
+ * the cm_id implicitely by returning a non-zero rc to the callout.
+ */
+static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
+ struct nvmet_rdma_queue *queue)
+{
+ unsigned long flags;
+
+ if (!queue) {
+ struct nvmet_port *port = cm_id->context;
+
+ /*
+ * This is a listener cm_id. Make sure that
+ * future remove_port won't invoke a double
+ * cm_id destroy. use atomic xchg to make sure
+ * we don't compete with remove_port.
+ */
+ if (xchg(&port->priv, NULL) != cm_id)
+ return 0;
+ } else {
+ /*
+ * This is a queue cm_id. Make sure that
+ * release queue will not destroy the cm_id
+ * and schedule all ctrl queues removal (only
+ * if the queue is not disconnecting already).
+ */
+ spin_lock_irqsave(&queue->state_lock, flags);
+ if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
+ queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
+ spin_unlock_irqrestore(&queue->state_lock, flags);
+ nvmet_rdma_queue_disconnect(queue);
+ flush_scheduled_work();
+ }
+
+ /*
+ * We need to return 1 so that the core will destroy
+ * it's own ID. What a great API design..
+ */
+ return 1;
+}
+
static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
@@ -1294,20 +1350,11 @@
break;
case RDMA_CM_EVENT_ADDR_CHANGE:
case RDMA_CM_EVENT_DISCONNECTED:
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
- /*
- * We can get the device removal callback even for a
- * CM ID that we aren't actually using. In that case
- * the context pointer is NULL, so we shouldn't try
- * to disconnect a non-existing queue. But we also
- * need to return 1 so that the core will destroy
- * it's own ID. What a great API design..
- */
- if (queue)
- nvmet_rdma_queue_disconnect(queue);
- else
- ret = 1;
+ nvmet_rdma_queue_disconnect(queue);
+ break;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ ret = nvmet_rdma_device_removal(cm_id, queue);
break;
case RDMA_CM_EVENT_REJECTED:
case RDMA_CM_EVENT_UNREACHABLE:
@@ -1396,9 +1443,10 @@
static void nvmet_rdma_remove_port(struct nvmet_port *port)
{
- struct rdma_cm_id *cm_id = port->priv;
+ struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
- rdma_destroy_id(cm_id);
+ if (cm_id)
+ rdma_destroy_id(cm_id);
}
static struct nvmet_fabrics_ops nvmet_rdma_ops = {
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index a02981e..eafa613 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1411,6 +1411,8 @@
if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
pci_msi_domain_update_chip_ops(info);
+ info->flags |= MSI_FLAG_ACTIVATE_EARLY;
+
domain = msi_create_irq_domain(fwnode, info, parent);
if (!domain)
return NULL;
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 6ccb994..c494613 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -688,7 +688,7 @@
return 0;
}
-static DEFINE_MUTEX(arm_pmu_mutex);
+static DEFINE_SPINLOCK(arm_pmu_lock);
static LIST_HEAD(arm_pmu_list);
/*
@@ -701,7 +701,7 @@
{
struct arm_pmu *pmu;
- mutex_lock(&arm_pmu_mutex);
+ spin_lock(&arm_pmu_lock);
list_for_each_entry(pmu, &arm_pmu_list, entry) {
if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
@@ -709,7 +709,7 @@
if (pmu->reset)
pmu->reset(pmu);
}
- mutex_unlock(&arm_pmu_mutex);
+ spin_unlock(&arm_pmu_lock);
return 0;
}
@@ -821,9 +821,9 @@
if (!cpu_hw_events)
return -ENOMEM;
- mutex_lock(&arm_pmu_mutex);
+ spin_lock(&arm_pmu_lock);
list_add_tail(&cpu_pmu->entry, &arm_pmu_list);
- mutex_unlock(&arm_pmu_mutex);
+ spin_unlock(&arm_pmu_lock);
err = cpu_pm_pmu_register(cpu_pmu);
if (err)
@@ -859,9 +859,9 @@
return 0;
out_unregister:
- mutex_lock(&arm_pmu_mutex);
+ spin_lock(&arm_pmu_lock);
list_del(&cpu_pmu->entry);
- mutex_unlock(&arm_pmu_mutex);
+ spin_unlock(&arm_pmu_lock);
free_percpu(cpu_hw_events);
return err;
}
@@ -869,9 +869,9 @@
static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
{
cpu_pm_pmu_unregister(cpu_pmu);
- mutex_lock(&arm_pmu_mutex);
+ spin_lock(&arm_pmu_lock);
list_del(&cpu_pmu->entry);
- mutex_unlock(&arm_pmu_mutex);
+ spin_unlock(&arm_pmu_lock);
free_percpu(cpu_pmu->hw_events);
}
@@ -967,11 +967,12 @@
/* If we didn't manage to parse anything, try the interrupt affinity */
if (cpumask_weight(&pmu->supported_cpus) == 0) {
- if (!using_spi) {
- /* If using PPIs, check the affinity of the partition */
- int ret, irq;
+ int irq = platform_get_irq(pdev, 0);
- irq = platform_get_irq(pdev, 0);
+ if (irq_is_percpu(irq)) {
+ /* If using PPIs, check the affinity of the partition */
+ int ret;
+
ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
if (ret) {
kfree(irqs);
diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c
index eb4990f..7fb7656 100644
--- a/drivers/pinctrl/intel/pinctrl-merrifield.c
+++ b/drivers/pinctrl/intel/pinctrl-merrifield.c
@@ -11,6 +11,7 @@
#include <linux/bitops.h>
#include <linux/err.h>
+#include <linux/io.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pinctrl/pinconf.h>
diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c
index 11623c6b..44e69c9 100644
--- a/drivers/pinctrl/meson/pinctrl-meson.c
+++ b/drivers/pinctrl/meson/pinctrl-meson.c
@@ -727,13 +727,7 @@
return PTR_ERR(pc->pcdev);
}
- ret = meson_gpiolib_register(pc);
- if (ret) {
- pinctrl_unregister(pc->pcdev);
- return ret;
- }
-
- return 0;
+ return meson_gpiolib_register(pc);
}
static struct platform_driver meson_pinctrl_driver = {
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 634b4d3..b3e7723 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -43,17 +43,6 @@
spin_lock_irqsave(&gpio_dev->lock, flags);
pin_reg = readl(gpio_dev->base + offset * 4);
- /*
- * Suppose BIOS or Bootloader sets specific debounce for the
- * GPIO. if not, set debounce to be 2.75ms and remove glitch.
- */
- if ((pin_reg & DB_TMR_OUT_MASK) == 0) {
- pin_reg |= 0xf;
- pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
- pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF;
- pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
- }
-
pin_reg &= ~BIT(OUTPUT_ENABLE_OFF);
writel(pin_reg, gpio_dev->base + offset * 4);
spin_unlock_irqrestore(&gpio_dev->lock, flags);
@@ -326,15 +315,6 @@
spin_lock_irqsave(&gpio_dev->lock, flags);
pin_reg = readl(gpio_dev->base + (d->hwirq)*4);
- /*
- Suppose BIOS or Bootloader sets specific debounce for the
- GPIO. if not, set debounce to be 2.75ms.
- */
- if ((pin_reg & DB_TMR_OUT_MASK) == 0) {
- pin_reg |= 0xf;
- pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
- pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
- }
pin_reg |= BIT(INTERRUPT_ENABLE_OFF);
pin_reg |= BIT(INTERRUPT_MASK_OFF);
writel(pin_reg, gpio_dev->base + (d->hwirq)*4);
diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c
index c6d410e..7bad200 100644
--- a/drivers/pinctrl/pinctrl-pistachio.c
+++ b/drivers/pinctrl/pinctrl-pistachio.c
@@ -1432,7 +1432,6 @@
{
struct pistachio_pinctrl *pctl;
struct resource *res;
- int ret;
pctl = devm_kzalloc(&pdev->dev, sizeof(*pctl), GFP_KERNEL);
if (!pctl)
@@ -1464,13 +1463,7 @@
return PTR_ERR(pctl->pctldev);
}
- ret = pistachio_gpio_register(pctl);
- if (ret < 0) {
- pinctrl_unregister(pctl->pctldev);
- return ret;
- }
-
- return 0;
+ return pistachio_gpio_register(pctl);
}
static struct platform_driver pistachio_pinctrl_driver = {
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index d2bc092..da2fe18 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -110,8 +110,8 @@
/* BIOS error detected */
{ KE_IGNORE, 0xe00d, { KEY_RESERVED } },
- /* Unknown, defined in ACPI DSDT */
- /* { KE_IGNORE, 0xe00e, { KEY_RESERVED } }, */
+ /* Battery was removed or inserted */
+ { KE_IGNORE, 0xe00e, { KEY_RESERVED } },
/* Wifi Catcher */
{ KE_KEY, 0xe011, { KEY_PROG2 } },
diff --git a/drivers/power/max17042_battery.c b/drivers/power/max17042_battery.c
index 9c65f13..da7a75f 100644
--- a/drivers/power/max17042_battery.c
+++ b/drivers/power/max17042_battery.c
@@ -457,13 +457,16 @@
}
static inline void max17042_read_model_data(struct max17042_chip *chip,
- u8 addr, u32 *data, int size)
+ u8 addr, u16 *data, int size)
{
struct regmap *map = chip->regmap;
int i;
+ u32 tmp;
- for (i = 0; i < size; i++)
- regmap_read(map, addr + i, &data[i]);
+ for (i = 0; i < size; i++) {
+ regmap_read(map, addr + i, &tmp);
+ data[i] = (u16)tmp;
+ }
}
static inline int max17042_model_data_compare(struct max17042_chip *chip,
@@ -486,7 +489,7 @@
{
int ret;
int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl);
- u32 *temp_data;
+ u16 *temp_data;
temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL);
if (!temp_data)
@@ -501,7 +504,7 @@
ret = max17042_model_data_compare(
chip,
chip->pdata->config_data->cell_char_tbl,
- (u16 *)temp_data,
+ temp_data,
table_size);
max10742_lock_model(chip);
@@ -514,7 +517,7 @@
{
int i;
int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl);
- u32 *temp_data;
+ u16 *temp_data;
int ret = 0;
temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL);
diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
index 3bfac53..c74c3f6 100644
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -200,8 +200,8 @@
config SYSCON_REBOOT_MODE
tristate "Generic SYSCON regmap reboot mode driver"
depends on OF
+ depends on MFD_SYSCON
select REBOOT_MODE
- select MFD_SYSCON
help
Say y here will enable reboot mode driver. This will
get reboot mode arguments and store it in SYSCON mapped
diff --git a/drivers/power/reset/hisi-reboot.c b/drivers/power/reset/hisi-reboot.c
index 9ab7f56..f69387e 100644
--- a/drivers/power/reset/hisi-reboot.c
+++ b/drivers/power/reset/hisi-reboot.c
@@ -53,13 +53,16 @@
if (of_property_read_u32(np, "reboot-offset", &reboot_offset) < 0) {
pr_err("failed to find reboot-offset property\n");
+ iounmap(base);
return -EINVAL;
}
err = register_restart_handler(&hisi_restart_nb);
- if (err)
+ if (err) {
dev_err(&pdev->dev, "cannot register restart handler (err=%d)\n",
err);
+ iounmap(base);
+ }
return err;
}
diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c
index 73dfae4..4c56e54 100644
--- a/drivers/power/tps65217_charger.c
+++ b/drivers/power/tps65217_charger.c
@@ -206,6 +206,7 @@
if (!charger)
return -ENOMEM;
+ platform_set_drvdata(pdev, charger);
charger->tps = tps;
charger->dev = &pdev->dev;
diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c
index cecc15a..3fa17ac 100644
--- a/drivers/rapidio/rio_cm.c
+++ b/drivers/rapidio/rio_cm.c
@@ -1080,8 +1080,8 @@
static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
long timeout)
{
- struct rio_channel *ch = NULL;
- struct rio_channel *new_ch = NULL;
+ struct rio_channel *ch;
+ struct rio_channel *new_ch;
struct conn_req *req;
struct cm_peer *peer;
int found = 0;
@@ -1155,6 +1155,7 @@
spin_unlock_bh(&ch->lock);
riocm_put_channel(ch);
+ ch = NULL;
kfree(req);
down_read(&rdev_sem);
@@ -1172,7 +1173,7 @@
if (!found) {
/* If peer device object not found, simply ignore the request */
err = -ENODEV;
- goto err_nodev;
+ goto err_put_new_ch;
}
new_ch->rdev = peer->rdev;
@@ -1184,15 +1185,16 @@
*new_ch_id = new_ch->id;
return new_ch;
+
+err_put_new_ch:
+ spin_lock_bh(&idr_lock);
+ idr_remove(&ch_idr, new_ch->id);
+ spin_unlock_bh(&idr_lock);
+ riocm_put_channel(new_ch);
+
err_put:
- riocm_put_channel(ch);
-err_nodev:
- if (new_ch) {
- spin_lock_bh(&idr_lock);
- idr_remove(&ch_idr, new_ch->id);
- spin_unlock_bh(&idr_lock);
- riocm_put_channel(new_ch);
- }
+ if (ch)
+ riocm_put_channel(ch);
*new_ch_id = 0;
return ERR_PTR(err);
}
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 8973d34..fb1b56a 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1643,9 +1643,18 @@
u8 *sense = NULL;
int expires;
+ cqr = (struct dasd_ccw_req *) intparm;
if (IS_ERR(irb)) {
switch (PTR_ERR(irb)) {
case -EIO:
+ if (cqr && cqr->status == DASD_CQR_CLEAR_PENDING) {
+ device = (struct dasd_device *) cqr->startdev;
+ cqr->status = DASD_CQR_CLEARED;
+ dasd_device_clear_timer(device);
+ wake_up(&dasd_flush_wq);
+ dasd_schedule_device_bh(device);
+ return;
+ }
break;
case -ETIMEDOUT:
DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: "
@@ -1661,7 +1670,6 @@
}
now = get_tod_clock();
- cqr = (struct dasd_ccw_req *) intparm;
/* check for conditions that should be handled immediately */
if (!cqr ||
!(scsw_dstat(&irb->scsw) == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) &&
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index fd2eff4..98bbec4 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -5078,6 +5078,8 @@
return PTR_ERR(cqr);
}
+ cqr->lpm = lpum;
+retry:
cqr->startdev = device;
cqr->memdev = device;
cqr->block = NULL;
@@ -5122,6 +5124,14 @@
(prssdp + 1);
memcpy(messages, message_buf,
sizeof(struct dasd_rssd_messages));
+ } else if (cqr->lpm) {
+ /*
+ * on z/VM we might not be able to do I/O on the requested path
+ * but instead we get the required information on any path
+ * so retry with open path mask
+ */
+ cqr->lpm = 0;
+ goto retry;
} else
DBF_EVENT_DEVID(DBF_WARNING, device->cdev,
"Reading messages failed with rc=%d\n"
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 7ada078..6a58bc8 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -762,7 +762,6 @@
priv->state = DEV_STATE_NOT_OPER;
priv->dev_id.devno = sch->schib.pmcw.dev;
priv->dev_id.ssid = sch->schid.ssid;
- priv->schid = sch->schid;
INIT_WORK(&priv->todo_work, ccw_device_todo);
INIT_LIST_HEAD(&priv->cmb_list);
@@ -1000,7 +999,6 @@
put_device(&old_sch->dev);
/* Initialize new subchannel. */
spin_lock_irq(sch->lock);
- cdev->private->schid = sch->schid;
cdev->ccwlock = sch->lock;
if (!sch_is_pseudo_sch(sch))
sch_set_cdev(sch, cdev);
diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c
index 15b56a1..9bc3512 100644
--- a/drivers/s390/cio/device_status.c
+++ b/drivers/s390/cio/device_status.c
@@ -26,6 +26,7 @@
static void
ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb)
{
+ struct subchannel *sch = to_subchannel(cdev->dev.parent);
char dbf_text[15];
if (!scsw_is_valid_cstat(&irb->scsw) ||
@@ -36,10 +37,10 @@
"received"
" ... device %04x on subchannel 0.%x.%04x, dev_stat "
": %02X sch_stat : %02X\n",
- cdev->private->dev_id.devno, cdev->private->schid.ssid,
- cdev->private->schid.sch_no,
+ cdev->private->dev_id.devno, sch->schid.ssid,
+ sch->schid.sch_no,
scsw_dstat(&irb->scsw), scsw_cstat(&irb->scsw));
- sprintf(dbf_text, "chk%x", cdev->private->schid.sch_no);
+ sprintf(dbf_text, "chk%x", sch->schid.sch_no);
CIO_TRACE_EVENT(0, dbf_text);
CIO_HEX_EVENT(0, irb, sizeof(struct irb));
}
diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h
index 8975060..220f491 100644
--- a/drivers/s390/cio/io_sch.h
+++ b/drivers/s390/cio/io_sch.h
@@ -120,7 +120,6 @@
int state; /* device state */
atomic_t onoff;
struct ccw_dev_id dev_id; /* device id */
- struct subchannel_id schid; /* subchannel number */
struct ccw_request req; /* internal I/O request */
int iretry;
u8 pgid_valid_mask; /* mask of valid PGIDs */
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 4bb5262f..71bf9bd 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -686,6 +686,15 @@
q->qdio_error = 0;
}
+static inline int qdio_tasklet_schedule(struct qdio_q *q)
+{
+ if (likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) {
+ tasklet_schedule(&q->tasklet);
+ return 0;
+ }
+ return -EPERM;
+}
+
static void __qdio_inbound_processing(struct qdio_q *q)
{
qperf_inc(q, tasklet_inbound);
@@ -698,10 +707,8 @@
if (!qdio_inbound_q_done(q)) {
/* means poll time is not yet over */
qperf_inc(q, tasklet_inbound_resched);
- if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) {
- tasklet_schedule(&q->tasklet);
+ if (!qdio_tasklet_schedule(q))
return;
- }
}
qdio_stop_polling(q);
@@ -711,8 +718,7 @@
*/
if (!qdio_inbound_q_done(q)) {
qperf_inc(q, tasklet_inbound_resched2);
- if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED))
- tasklet_schedule(&q->tasklet);
+ qdio_tasklet_schedule(q);
}
}
@@ -869,16 +875,15 @@
* is noticed and outbound_handler is called after some time.
*/
if (qdio_outbound_q_done(q))
- del_timer(&q->u.out.timer);
+ del_timer_sync(&q->u.out.timer);
else
- if (!timer_pending(&q->u.out.timer))
+ if (!timer_pending(&q->u.out.timer) &&
+ likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE))
mod_timer(&q->u.out.timer, jiffies + 10 * HZ);
return;
sched:
- if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED))
- return;
- tasklet_schedule(&q->tasklet);
+ qdio_tasklet_schedule(q);
}
/* outbound tasklet */
@@ -892,9 +897,7 @@
{
struct qdio_q *q = (struct qdio_q *)data;
- if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED))
- return;
- tasklet_schedule(&q->tasklet);
+ qdio_tasklet_schedule(q);
}
static inline void qdio_check_outbound_after_thinint(struct qdio_q *q)
@@ -907,7 +910,7 @@
for_each_output_queue(q->irq_ptr, out, i)
if (!qdio_outbound_q_done(out))
- tasklet_schedule(&out->tasklet);
+ qdio_tasklet_schedule(out);
}
static void __tiqdio_inbound_processing(struct qdio_q *q)
@@ -929,10 +932,8 @@
if (!qdio_inbound_q_done(q)) {
qperf_inc(q, tasklet_inbound_resched);
- if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) {
- tasklet_schedule(&q->tasklet);
+ if (!qdio_tasklet_schedule(q))
return;
- }
}
qdio_stop_polling(q);
@@ -942,8 +943,7 @@
*/
if (!qdio_inbound_q_done(q)) {
qperf_inc(q, tasklet_inbound_resched2);
- if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED))
- tasklet_schedule(&q->tasklet);
+ qdio_tasklet_schedule(q);
}
}
@@ -977,7 +977,7 @@
int i;
struct qdio_q *q;
- if (unlikely(irq_ptr->state == QDIO_IRQ_STATE_STOPPED))
+ if (unlikely(irq_ptr->state != QDIO_IRQ_STATE_ACTIVE))
return;
for_each_input_queue(irq_ptr, q, i) {
@@ -1003,7 +1003,7 @@
continue;
if (need_siga_sync(q) && need_siga_sync_out_after_pci(q))
qdio_siga_sync_q(q);
- tasklet_schedule(&q->tasklet);
+ qdio_tasklet_schedule(q);
}
}
@@ -1066,10 +1066,12 @@
struct irb *irb)
{
struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+ struct subchannel_id schid;
int cstat, dstat;
if (!intparm || !irq_ptr) {
- DBF_ERROR("qint:%4x", cdev->private->schid.sch_no);
+ ccw_device_get_schid(cdev, &schid);
+ DBF_ERROR("qint:%4x", schid.sch_no);
return;
}
@@ -1122,12 +1124,14 @@
int qdio_get_ssqd_desc(struct ccw_device *cdev,
struct qdio_ssqd_desc *data)
{
+ struct subchannel_id schid;
if (!cdev || !cdev->private)
return -EINVAL;
- DBF_EVENT("get ssqd:%4x", cdev->private->schid.sch_no);
- return qdio_setup_get_ssqd(NULL, &cdev->private->schid, data);
+ ccw_device_get_schid(cdev, &schid);
+ DBF_EVENT("get ssqd:%4x", schid.sch_no);
+ return qdio_setup_get_ssqd(NULL, &schid, data);
}
EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc);
@@ -1141,7 +1145,7 @@
tasklet_kill(&q->tasklet);
for_each_output_queue(irq_ptr, q, i) {
- del_timer(&q->u.out.timer);
+ del_timer_sync(&q->u.out.timer);
tasklet_kill(&q->tasklet);
}
}
@@ -1154,14 +1158,15 @@
int qdio_shutdown(struct ccw_device *cdev, int how)
{
struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+ struct subchannel_id schid;
int rc;
- unsigned long flags;
if (!irq_ptr)
return -ENODEV;
WARN_ON_ONCE(irqs_disabled());
- DBF_EVENT("qshutdown:%4x", cdev->private->schid.sch_no);
+ ccw_device_get_schid(cdev, &schid);
+ DBF_EVENT("qshutdown:%4x", schid.sch_no);
mutex_lock(&irq_ptr->setup_mutex);
/*
@@ -1184,7 +1189,7 @@
qdio_shutdown_debug_entries(irq_ptr);
/* cleanup subchannel */
- spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+ spin_lock_irq(get_ccwdev_lock(cdev));
if (how & QDIO_FLAG_CLEANUP_USING_CLEAR)
rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP);
@@ -1198,12 +1203,12 @@
}
qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP);
- spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+ spin_unlock_irq(get_ccwdev_lock(cdev));
wait_event_interruptible_timeout(cdev->private->wait_q,
irq_ptr->state == QDIO_IRQ_STATE_INACTIVE ||
irq_ptr->state == QDIO_IRQ_STATE_ERR,
10 * HZ);
- spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+ spin_lock_irq(get_ccwdev_lock(cdev));
no_cleanup:
qdio_shutdown_thinint(irq_ptr);
@@ -1211,7 +1216,7 @@
/* restore interrupt handler */
if ((void *)cdev->handler == (void *)qdio_int_handler)
cdev->handler = irq_ptr->orig_handler;
- spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+ spin_unlock_irq(get_ccwdev_lock(cdev));
qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
mutex_unlock(&irq_ptr->setup_mutex);
@@ -1228,11 +1233,13 @@
int qdio_free(struct ccw_device *cdev)
{
struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+ struct subchannel_id schid;
if (!irq_ptr)
return -ENODEV;
- DBF_EVENT("qfree:%4x", cdev->private->schid.sch_no);
+ ccw_device_get_schid(cdev, &schid);
+ DBF_EVENT("qfree:%4x", schid.sch_no);
DBF_DEV_EVENT(DBF_ERR, irq_ptr, "dbf abandoned");
mutex_lock(&irq_ptr->setup_mutex);
@@ -1251,9 +1258,11 @@
*/
int qdio_allocate(struct qdio_initialize *init_data)
{
+ struct subchannel_id schid;
struct qdio_irq *irq_ptr;
- DBF_EVENT("qallocate:%4x", init_data->cdev->private->schid.sch_no);
+ ccw_device_get_schid(init_data->cdev, &schid);
+ DBF_EVENT("qallocate:%4x", schid.sch_no);
if ((init_data->no_input_qs && !init_data->input_handler) ||
(init_data->no_output_qs && !init_data->output_handler))
@@ -1331,20 +1340,18 @@
*/
int qdio_establish(struct qdio_initialize *init_data)
{
- struct qdio_irq *irq_ptr;
struct ccw_device *cdev = init_data->cdev;
- unsigned long saveflags;
+ struct subchannel_id schid;
+ struct qdio_irq *irq_ptr;
int rc;
- DBF_EVENT("qestablish:%4x", cdev->private->schid.sch_no);
+ ccw_device_get_schid(cdev, &schid);
+ DBF_EVENT("qestablish:%4x", schid.sch_no);
irq_ptr = cdev->private->qdio_data;
if (!irq_ptr)
return -ENODEV;
- if (cdev->private->state != DEV_STATE_ONLINE)
- return -EINVAL;
-
mutex_lock(&irq_ptr->setup_mutex);
qdio_setup_irq(init_data);
@@ -1361,17 +1368,14 @@
irq_ptr->ccw.count = irq_ptr->equeue.count;
irq_ptr->ccw.cda = (u32)((addr_t)irq_ptr->qdr);
- spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags);
+ spin_lock_irq(get_ccwdev_lock(cdev));
ccw_device_set_options_mask(cdev, 0);
rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0);
+ spin_unlock_irq(get_ccwdev_lock(cdev));
if (rc) {
DBF_ERROR("%4x est IO ERR", irq_ptr->schid.sch_no);
DBF_ERROR("rc:%4x", rc);
- }
- spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags);
-
- if (rc) {
mutex_unlock(&irq_ptr->setup_mutex);
qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
return rc;
@@ -1407,19 +1411,17 @@
*/
int qdio_activate(struct ccw_device *cdev)
{
+ struct subchannel_id schid;
struct qdio_irq *irq_ptr;
int rc;
- unsigned long saveflags;
- DBF_EVENT("qactivate:%4x", cdev->private->schid.sch_no);
+ ccw_device_get_schid(cdev, &schid);
+ DBF_EVENT("qactivate:%4x", schid.sch_no);
irq_ptr = cdev->private->qdio_data;
if (!irq_ptr)
return -ENODEV;
- if (cdev->private->state != DEV_STATE_ONLINE)
- return -EINVAL;
-
mutex_lock(&irq_ptr->setup_mutex);
if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) {
rc = -EBUSY;
@@ -1431,19 +1433,17 @@
irq_ptr->ccw.count = irq_ptr->aqueue.count;
irq_ptr->ccw.cda = 0;
- spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags);
+ spin_lock_irq(get_ccwdev_lock(cdev));
ccw_device_set_options(cdev, CCWDEV_REPORT_ALL);
rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE,
0, DOIO_DENY_PREFETCH);
+ spin_unlock_irq(get_ccwdev_lock(cdev));
if (rc) {
DBF_ERROR("%4x act IO ERR", irq_ptr->schid.sch_no);
DBF_ERROR("rc:%4x", rc);
- }
- spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags);
-
- if (rc)
goto out;
+ }
if (is_thinint_irq(irq_ptr))
tiqdio_add_input_queues(irq_ptr);
@@ -1585,10 +1585,11 @@
/* in case of SIGA errors we must process the error immediately */
if (used >= q->u.out.scan_threshold || rc)
- tasklet_schedule(&q->tasklet);
+ qdio_tasklet_schedule(q);
else
/* free the SBALs in case of no further traffic */
- if (!timer_pending(&q->u.out.timer))
+ if (!timer_pending(&q->u.out.timer) &&
+ likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE))
mod_timer(&q->u.out.timer, jiffies + HZ);
return rc;
}
diff --git a/drivers/s390/virtio/Makefile b/drivers/s390/virtio/Makefile
index 241891a..df40692 100644
--- a/drivers/s390/virtio/Makefile
+++ b/drivers/s390/virtio/Makefile
@@ -6,4 +6,8 @@
# it under the terms of the GNU General Public License (version 2 only)
# as published by the Free Software Foundation.
-obj-$(CONFIG_S390_GUEST) += kvm_virtio.o virtio_ccw.o
+s390-virtio-objs := virtio_ccw.o
+ifdef CONFIG_S390_GUEST_OLD_TRANSPORT
+s390-virtio-objs += kvm_virtio.o
+endif
+obj-$(CONFIG_S390_GUEST) += $(s390-virtio-objs)
diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c
index 1d060fd..5e5c11f 100644
--- a/drivers/s390/virtio/kvm_virtio.c
+++ b/drivers/s390/virtio/kvm_virtio.c
@@ -458,6 +458,8 @@
if (test_devices_support(total_memory_size) < 0)
return -ENODEV;
+ pr_warn("The s390-virtio transport is deprecated. Please switch to a modern host providing virtio-ccw.\n");
+
rc = vmem_add_mapping(total_memory_size, PAGE_SIZE);
if (rc)
return rc;
@@ -482,7 +484,7 @@
}
/* code for early console output with virtio_console */
-static __init int early_put_chars(u32 vtermno, const char *buf, int count)
+static int early_put_chars(u32 vtermno, const char *buf, int count)
{
char scratch[17];
unsigned int len = count;
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index bf85974..17d04c7 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -10410,8 +10410,11 @@
__ipr_remove(pdev);
return rc;
}
+ spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
+ ioa_cfg->scan_enabled = 1;
+ schedule_work(&ioa_cfg->work_q);
+ spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
- scsi_scan_host(ioa_cfg->host);
ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight;
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
@@ -10421,10 +10424,8 @@
}
}
- spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
- ioa_cfg->scan_enabled = 1;
- schedule_work(&ioa_cfg->work_q);
- spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
+ scsi_scan_host(ioa_cfg->host);
+
return 0;
}
diff --git a/drivers/thermal/clock_cooling.c b/drivers/thermal/clock_cooling.c
index 1b4ff0f..ed5dd0e 100644
--- a/drivers/thermal/clock_cooling.c
+++ b/drivers/thermal/clock_cooling.c
@@ -426,6 +426,7 @@
if (!ccdev)
return ERR_PTR(-ENOMEM);
+ mutex_init(&ccdev->lock);
ccdev->dev = dev;
ccdev->clk = devm_clk_get(dev, clock_name);
if (IS_ERR(ccdev->clk))
diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c
index 34fe365..68bd1b5 100644
--- a/drivers/thermal/fair_share.c
+++ b/drivers/thermal/fair_share.c
@@ -116,7 +116,9 @@
instance->target = get_target_state(tz, cdev, percentage,
cur_trip_level);
+ mutex_lock(&instance->cdev->lock);
instance->cdev->updated = false;
+ mutex_unlock(&instance->cdev->lock);
thermal_cdev_update(cdev);
}
return 0;
diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c
index fc52016..bb118a1 100644
--- a/drivers/thermal/gov_bang_bang.c
+++ b/drivers/thermal/gov_bang_bang.c
@@ -71,7 +71,9 @@
dev_dbg(&instance->cdev->device, "target=%d\n",
(int)instance->target);
+ mutex_lock(&instance->cdev->lock);
instance->cdev->updated = false; /* cdev needs update */
+ mutex_unlock(&instance->cdev->lock);
}
mutex_unlock(&tz->lock);
diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c
index 6a6ec1c..9b4815e 100644
--- a/drivers/thermal/intel_pch_thermal.c
+++ b/drivers/thermal/intel_pch_thermal.c
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/thermal.h>
+#include <linux/pm.h>
/* Intel PCH thermal Device IDs */
#define PCH_THERMAL_DID_WPT 0x9CA4 /* Wildcat Point */
@@ -65,6 +66,7 @@
unsigned long crt_temp;
int hot_trip_id;
unsigned long hot_temp;
+ bool bios_enabled;
};
static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips)
@@ -75,8 +77,10 @@
*nr_trips = 0;
/* Check if BIOS has already enabled thermal sensor */
- if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS))
+ if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS)) {
+ ptd->bios_enabled = true;
goto read_trips;
+ }
tsel = readb(ptd->hw_base + WPT_TSEL);
/*
@@ -130,9 +134,39 @@
return 0;
}
+static int pch_wpt_suspend(struct pch_thermal_device *ptd)
+{
+ u8 tsel;
+
+ if (ptd->bios_enabled)
+ return 0;
+
+ tsel = readb(ptd->hw_base + WPT_TSEL);
+
+ writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL);
+
+ return 0;
+}
+
+static int pch_wpt_resume(struct pch_thermal_device *ptd)
+{
+ u8 tsel;
+
+ if (ptd->bios_enabled)
+ return 0;
+
+ tsel = readb(ptd->hw_base + WPT_TSEL);
+
+ writeb(tsel | WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL);
+
+ return 0;
+}
+
struct pch_dev_ops {
int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips);
int (*get_temp)(struct pch_thermal_device *ptd, int *temp);
+ int (*suspend)(struct pch_thermal_device *ptd);
+ int (*resume)(struct pch_thermal_device *ptd);
};
@@ -140,6 +174,8 @@
static const struct pch_dev_ops pch_dev_ops_wpt = {
.hw_init = pch_wpt_init,
.get_temp = pch_wpt_get_temp,
+ .suspend = pch_wpt_suspend,
+ .resume = pch_wpt_resume,
};
static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp)
@@ -269,6 +305,22 @@
pci_disable_device(pdev);
}
+static int intel_pch_thermal_suspend(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+
+ return ptd->ops->suspend(ptd);
+}
+
+static int intel_pch_thermal_resume(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+
+ return ptd->ops->resume(ptd);
+}
+
static struct pci_device_id intel_pch_thermal_id[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL) },
@@ -276,11 +328,17 @@
};
MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);
+static const struct dev_pm_ops intel_pch_pm_ops = {
+ .suspend = intel_pch_thermal_suspend,
+ .resume = intel_pch_thermal_resume,
+};
+
static struct pci_driver intel_pch_thermal_driver = {
.name = "intel_pch_thermal",
.id_table = intel_pch_thermal_id,
.probe = intel_pch_thermal_probe,
.remove = intel_pch_thermal_remove,
+ .driver.pm = &intel_pch_pm_ops,
};
module_pci_driver(intel_pch_thermal_driver);
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index 015ce2e..0e4dc0a 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -388,7 +388,7 @@
int sleeptime;
unsigned long target_jiffies;
unsigned int guard;
- unsigned int compensation = 0;
+ unsigned int compensated_ratio;
int interval; /* jiffies to sleep for each attempt */
unsigned int duration_jiffies = msecs_to_jiffies(duration);
unsigned int window_size_now;
@@ -409,8 +409,11 @@
* c-states, thus we need to compensate the injected idle ratio
* to achieve the actual target reported by the HW.
*/
- compensation = get_compensation(target_ratio);
- interval = duration_jiffies*100/(target_ratio+compensation);
+ compensated_ratio = target_ratio +
+ get_compensation(target_ratio);
+ if (compensated_ratio <= 0)
+ compensated_ratio = 1;
+ interval = duration_jiffies * 100 / compensated_ratio;
/* align idle time */
target_jiffies = roundup(jiffies, interval);
@@ -647,8 +650,8 @@
goto exit_set;
} else if (set_target_ratio > 0 && new_target_ratio == 0) {
pr_info("Stop forced idle injection\n");
- set_target_ratio = 0;
end_power_clamp();
+ set_target_ratio = 0;
} else /* adjust currently running */ {
set_target_ratio = new_target_ratio;
/* make new set_target_ratio visible to other cpus */
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c
index 2f1a863..b4d3116 100644
--- a/drivers/thermal/power_allocator.c
+++ b/drivers/thermal/power_allocator.c
@@ -529,7 +529,9 @@
continue;
instance->target = 0;
+ mutex_lock(&instance->cdev->lock);
instance->cdev->updated = false;
+ mutex_unlock(&instance->cdev->lock);
thermal_cdev_update(instance->cdev);
}
}
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index ea9366a..bcef2e7 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -175,7 +175,9 @@
update_passive_instance(tz, trip_type, -1);
instance->initialized = true;
+ mutex_lock(&instance->cdev->lock);
instance->cdev->updated = false; /* cdev needs update */
+ mutex_unlock(&instance->cdev->lock);
}
mutex_unlock(&tz->lock);
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 5133cd1..e2fc616 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1093,7 +1093,9 @@
return ret;
instance->target = state;
+ mutex_lock(&cdev->lock);
cdev->updated = false;
+ mutex_unlock(&cdev->lock);
thermal_cdev_update(cdev);
return 0;
@@ -1623,11 +1625,13 @@
struct thermal_instance *instance;
unsigned long target = 0;
- /* cooling device is updated*/
- if (cdev->updated)
- return;
-
mutex_lock(&cdev->lock);
+ /* cooling device is updated*/
+ if (cdev->updated) {
+ mutex_unlock(&cdev->lock);
+ return;
+ }
+
/* Make sure cdev enters the deepest cooling state */
list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) {
dev_dbg(&cdev->device, "zone%d->target=%lu\n",
@@ -1637,9 +1641,9 @@
if (instance->target > target)
target = instance->target;
}
- mutex_unlock(&cdev->lock);
cdev->ops->set_cur_state(cdev, target);
cdev->updated = true;
+ mutex_unlock(&cdev->lock);
trace_cdev_update(cdev, target);
dev_dbg(&cdev->device, "set to state %lu\n", target);
}
diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c
index 06fd2ed9..c41c774 100644
--- a/drivers/thermal/thermal_hwmon.c
+++ b/drivers/thermal/thermal_hwmon.c
@@ -232,6 +232,7 @@
return result;
}
+EXPORT_SYMBOL_GPL(thermal_add_hwmon_sysfs);
void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
{
@@ -270,3 +271,4 @@
hwmon_device_unregister(hwmon->device);
kfree(hwmon);
}
+EXPORT_SYMBOL_GPL(thermal_remove_hwmon_sysfs);
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 15ecfc9..152b438 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -564,67 +564,80 @@
}
static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
- uint32_t flags, void *data)
+ unsigned int count, uint32_t flags,
+ void *data)
{
- int32_t fd = *(int32_t *)data;
-
- if (!(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
- return -EINVAL;
-
/* DATA_NONE/DATA_BOOL enables loopback testing */
if (flags & VFIO_IRQ_SET_DATA_NONE) {
- if (*ctx)
- eventfd_signal(*ctx, 1);
- return 0;
+ if (*ctx) {
+ if (count) {
+ eventfd_signal(*ctx, 1);
+ } else {
+ eventfd_ctx_put(*ctx);
+ *ctx = NULL;
+ }
+ return 0;
+ }
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
- uint8_t trigger = *(uint8_t *)data;
+ uint8_t trigger;
+
+ if (!count)
+ return -EINVAL;
+
+ trigger = *(uint8_t *)data;
if (trigger && *ctx)
eventfd_signal(*ctx, 1);
+
+ return 0;
+ } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ int32_t fd;
+
+ if (!count)
+ return -EINVAL;
+
+ fd = *(int32_t *)data;
+ if (fd == -1) {
+ if (*ctx)
+ eventfd_ctx_put(*ctx);
+ *ctx = NULL;
+ } else if (fd >= 0) {
+ struct eventfd_ctx *efdctx;
+
+ efdctx = eventfd_ctx_fdget(fd);
+ if (IS_ERR(efdctx))
+ return PTR_ERR(efdctx);
+
+ if (*ctx)
+ eventfd_ctx_put(*ctx);
+
+ *ctx = efdctx;
+ }
return 0;
}
- /* Handle SET_DATA_EVENTFD */
- if (fd == -1) {
- if (*ctx)
- eventfd_ctx_put(*ctx);
- *ctx = NULL;
- return 0;
- } else if (fd >= 0) {
- struct eventfd_ctx *efdctx;
- efdctx = eventfd_ctx_fdget(fd);
- if (IS_ERR(efdctx))
- return PTR_ERR(efdctx);
- if (*ctx)
- eventfd_ctx_put(*ctx);
- *ctx = efdctx;
- return 0;
- } else
- return -EINVAL;
+ return -EINVAL;
}
static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
unsigned index, unsigned start,
unsigned count, uint32_t flags, void *data)
{
- if (index != VFIO_PCI_ERR_IRQ_INDEX)
+ if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
return -EINVAL;
- /*
- * We should sanitize start & count, but that wasn't caught
- * originally, so this IRQ index must forever ignore them :-(
- */
-
- return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, flags, data);
+ return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
+ count, flags, data);
}
static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
unsigned index, unsigned start,
unsigned count, uint32_t flags, void *data)
{
- if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count != 1)
+ if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
return -EINVAL;
- return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, flags, data);
+ return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
+ count, flags, data);
}
int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 388eec4..97fb2f8 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -220,20 +220,20 @@
{
void *priv = NULL;
long err;
- struct vhost_memory *memory;
+ struct vhost_umem *umem;
mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev);
if (err)
goto done;
- memory = vhost_dev_reset_owner_prepare();
- if (!memory) {
+ umem = vhost_dev_reset_owner_prepare();
+ if (!umem) {
err = -ENOMEM;
goto done;
}
vhost_test_stop(n, &priv);
vhost_test_flush(n);
- vhost_dev_reset_owner(&n->dev, memory);
+ vhost_dev_reset_owner(&n->dev, umem);
done:
mutex_unlock(&n->dev.mutex);
return err;
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 0ddf3a2..e3b30ea 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -307,6 +307,8 @@
vhost_disable_notify(&vsock->dev, vq);
for (;;) {
+ u32 len;
+
if (!vhost_vsock_more_replies(vsock)) {
/* Stop tx until the device processes already
* pending replies. Leave tx virtqueue
@@ -334,13 +336,15 @@
continue;
}
+ len = pkt->len;
+
/* Only accept correctly addressed packets */
if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
virtio_transport_recv_pkt(pkt);
else
virtio_transport_free_pkt(pkt);
- vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + len);
added = true;
}
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 114a0c8..e383ecd 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -327,6 +327,8 @@
* host should service the ring ASAP. */
if (out_sgs)
vq->notify(&vq->vq);
+ if (indirect)
+ kfree(desc);
END_USE(vq);
return -ENOSPC;
}
@@ -426,6 +428,7 @@
if (indirect)
kfree(desc);
+ END_USE(vq);
return -EIO;
}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 4b0eff6..85737e9 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -189,11 +189,8 @@
case 1:
_debug("extract FID count");
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->count = ntohl(call->tmp);
_debug("FID count: %u", call->count);
@@ -210,11 +207,8 @@
_debug("extract FID array");
ret = afs_extract_data(call, skb, last, call->buffer,
call->count * 3 * 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
_debug("unmarshall FID array");
call->request = kcalloc(call->count,
@@ -239,11 +233,8 @@
case 3:
_debug("extract CB count");
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
tmp = ntohl(call->tmp);
_debug("CB count: %u", tmp);
@@ -258,11 +249,8 @@
_debug("extract CB array");
ret = afs_extract_data(call, skb, last, call->request,
call->count * 3 * 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
_debug("unmarshall CB array");
cb = call->request;
@@ -278,9 +266,9 @@
call->unmarshall++;
case 5:
- _debug("trailer");
- if (skb->len != 0)
- return -EBADMSG;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
/* Record that the message was unmarshalled successfully so
* that the call destructor can know do the callback breaking
@@ -294,8 +282,6 @@
break;
}
- if (!last)
- return 0;
call->state = AFS_CALL_REPLYING;
@@ -335,13 +321,13 @@
{
struct afs_server *server;
struct in_addr addr;
+ int ret;
_enter(",{%u},%d", skb->len, last);
- if (skb->len > 0)
- return -EBADMSG;
- if (!last)
- return 0;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
/* no unmarshalling required */
call->state = AFS_CALL_REPLYING;
@@ -371,8 +357,10 @@
_enter(",{%u},%d", skb->len, last);
+ /* There are some arguments that we ignore */
+ afs_data_consumed(call, skb);
if (!last)
- return 0;
+ return -EAGAIN;
/* no unmarshalling required */
call->state = AFS_CALL_REPLYING;
@@ -408,12 +396,13 @@
static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
bool last)
{
+ int ret;
+
_enter(",{%u},%d", skb->len, last);
- if (skb->len > 0)
- return -EBADMSG;
- if (!last)
- return 0;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
/* no unmarshalling required */
call->state = AFS_CALL_REPLYING;
@@ -460,10 +449,9 @@
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- if (skb->len > 0)
- return -EBADMSG;
- if (!last)
- return 0;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
switch (call->unmarshall) {
case 0:
@@ -509,8 +497,9 @@
break;
}
- if (!last)
- return 0;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
call->state = AFS_CALL_REPLYING;
@@ -588,12 +577,13 @@
static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
struct sk_buff *skb, bool last)
{
+ int ret;
+
_enter(",{%u},%d", skb->len, last);
- if (skb->len > 0)
- return -EBADMSG;
- if (!last)
- return 0;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
/* no unmarshalling required */
call->state = AFS_CALL_REPLYING;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index c2e930e..9312b92 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -240,15 +240,13 @@
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
+ int ret;
_enter(",,%u", last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -335,11 +333,8 @@
case 1:
_debug("extract data length (MSW)");
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->count = ntohl(call->tmp);
_debug("DATA length MSW: %u", call->count);
@@ -353,11 +348,8 @@
case 2:
_debug("extract data length");
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->count = ntohl(call->tmp);
_debug("DATA length: %u", call->count);
@@ -375,11 +367,8 @@
ret = afs_extract_data(call, skb, last, buffer,
call->count);
kunmap_atomic(buffer);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
}
call->offset = 0;
@@ -389,11 +378,8 @@
case 4:
ret = afs_extract_data(call, skb, last, call->buffer,
(21 + 3 + 6) * 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
bp = call->buffer;
xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
@@ -405,15 +391,12 @@
call->unmarshall++;
case 5:
- _debug("trailer");
- if (skb->len != 0)
- return -EBADMSG;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
break;
}
- if (!last)
- return 0;
-
if (call->count < PAGE_SIZE) {
_debug("clear");
page = call->reply3;
@@ -537,9 +520,8 @@
{
_enter(",{%u},%d", skb->len, last);
- if (skb->len > 0)
- return -EBADMSG; /* shouldn't be any reply data */
- return 0;
+ /* shouldn't be any reply data */
+ return afs_data_complete(call, skb, last);
}
/*
@@ -622,15 +604,13 @@
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
+ int ret;
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -721,15 +701,13 @@
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
+ int ret;
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -804,15 +782,13 @@
{
struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
const __be32 *bp;
+ int ret;
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -892,15 +868,13 @@
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
+ int ret;
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -999,15 +973,13 @@
{
struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
const __be32 *bp;
+ int ret;
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -1105,20 +1077,13 @@
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
+ int ret;
_enter(",,%u", last);
- afs_transfer_reply(call, skb);
- if (!last) {
- _leave(" = 0 [more]");
- return 0;
- }
-
- if (call->reply_size != call->reply_max) {
- _leave(" = -EBADMSG [%u != %u]",
- call->reply_size, call->reply_max);
- return -EBADMSG;
- }
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
@@ -1292,20 +1257,13 @@
afs_dataversion_t *store_version;
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
+ int ret;
_enter(",,%u", last);
- afs_transfer_reply(call, skb);
- if (!last) {
- _leave(" = 0 [more]");
- return 0;
- }
-
- if (call->reply_size != call->reply_max) {
- _leave(" = -EBADMSG [%u != %u]",
- call->reply_size, call->reply_max);
- return -EBADMSG;
- }
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
store_version = NULL;
@@ -1504,11 +1462,8 @@
_debug("extract status");
ret = afs_extract_data(call, skb, last, call->buffer,
12 * 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
bp = call->buffer;
xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2);
@@ -1518,11 +1473,8 @@
/* extract the volume name length */
case 2:
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->count = ntohl(call->tmp);
_debug("volname length: %u", call->count);
@@ -1537,11 +1489,8 @@
if (call->count > 0) {
ret = afs_extract_data(call, skb, last, call->reply3,
call->count);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
}
p = call->reply3;
@@ -1561,11 +1510,8 @@
case 4:
ret = afs_extract_data(call, skb, last, call->buffer,
call->count);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->offset = 0;
call->unmarshall++;
@@ -1574,11 +1520,8 @@
/* extract the offline message length */
case 5:
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->count = ntohl(call->tmp);
_debug("offline msg length: %u", call->count);
@@ -1593,11 +1536,8 @@
if (call->count > 0) {
ret = afs_extract_data(call, skb, last, call->reply3,
call->count);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
}
p = call->reply3;
@@ -1617,11 +1557,8 @@
case 7:
ret = afs_extract_data(call, skb, last, call->buffer,
call->count);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->offset = 0;
call->unmarshall++;
@@ -1630,11 +1567,8 @@
/* extract the message of the day length */
case 8:
ret = afs_extract_data(call, skb, last, &call->tmp, 4);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->count = ntohl(call->tmp);
_debug("motd length: %u", call->count);
@@ -1649,11 +1583,8 @@
if (call->count > 0) {
ret = afs_extract_data(call, skb, last, call->reply3,
call->count);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
}
p = call->reply3;
@@ -1673,26 +1604,20 @@
case 10:
ret = afs_extract_data(call, skb, last, call->buffer,
call->count);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
- }
+ if (ret < 0)
+ return ret;
call->offset = 0;
call->unmarshall++;
no_motd_padding:
case 11:
- _debug("trailer %d", skb->len);
- if (skb->len != 0)
- return -EBADMSG;
+ ret = afs_data_complete(call, skb, last);
+ if (ret < 0)
+ return ret;
break;
}
- if (!last)
- return 0;
-
_leave(" = 0 [done]");
return 0;
}
@@ -1764,15 +1689,13 @@
struct sk_buff *skb, bool last)
{
const __be32 *bp;
+ int ret;
_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 71d5982..df976b2 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -609,17 +609,29 @@
*/
extern int afs_open_socket(void);
extern void afs_close_socket(void);
+extern void afs_data_consumed(struct afs_call *, struct sk_buff *);
extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
const struct afs_wait_mode *);
extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
size_t, size_t);
extern void afs_flat_call_destructor(struct afs_call *);
-extern void afs_transfer_reply(struct afs_call *, struct sk_buff *);
+extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool);
extern void afs_send_empty_reply(struct afs_call *);
extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
size_t);
+static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb,
+ bool last)
+{
+ if (skb->len > 0)
+ return -EBADMSG;
+ afs_data_consumed(call, skb);
+ if (!last)
+ return -EAGAIN;
+ return 0;
+}
+
/*
* security.c
*/
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 4832de84..14d04c8 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -150,10 +150,9 @@
}
/*
- * note that the data in a socket buffer is now delivered and that the buffer
- * should be freed
+ * Note that the data in a socket buffer is now consumed.
*/
-static void afs_data_delivered(struct sk_buff *skb)
+void afs_data_consumed(struct afs_call *call, struct sk_buff *skb)
{
if (!skb) {
_debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
@@ -161,9 +160,7 @@
} else {
_debug("DLVR %p{%u} [%d]",
skb, skb->mark, atomic_read(&afs_outstanding_skbs));
- if (atomic_dec_return(&afs_outstanding_skbs) == -1)
- BUG();
- rxrpc_kernel_data_delivered(skb);
+ rxrpc_kernel_data_consumed(call->rxcall, skb);
}
}
@@ -489,9 +486,15 @@
last = rxrpc_kernel_is_data_last(skb);
ret = call->type->deliver(call, skb, last);
switch (ret) {
+ case -EAGAIN:
+ if (last) {
+ _debug("short data");
+ goto unmarshal_error;
+ }
+ break;
case 0:
- if (last &&
- call->state == AFS_CALL_AWAIT_REPLY)
+ ASSERT(last);
+ if (call->state == AFS_CALL_AWAIT_REPLY)
call->state = AFS_CALL_COMPLETE;
break;
case -ENOTCONN:
@@ -501,6 +504,7 @@
abort_code = RX_INVALID_OPERATION;
goto do_abort;
default:
+ unmarshal_error:
abort_code = RXGEN_CC_UNMARSHAL;
if (call->state != AFS_CALL_AWAIT_REPLY)
abort_code = RXGEN_SS_UNMARSHAL;
@@ -511,9 +515,7 @@
call->state = AFS_CALL_ERROR;
break;
}
- afs_data_delivered(skb);
- skb = NULL;
- continue;
+ break;
case RXRPC_SKB_MARK_FINAL_ACK:
_debug("Rcv ACK");
call->state = AFS_CALL_COMPLETE;
@@ -685,15 +687,35 @@
}
/*
- * empty a socket buffer into a flat reply buffer
+ * Empty a socket buffer into a flat reply buffer.
*/
-void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
+int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last)
{
size_t len = skb->len;
- if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0)
- BUG();
- call->reply_size += len;
+ if (len > call->reply_max - call->reply_size) {
+ _leave(" = -EBADMSG [%zu > %u]",
+ len, call->reply_max - call->reply_size);
+ return -EBADMSG;
+ }
+
+ if (len > 0) {
+ if (skb_copy_bits(skb, 0, call->buffer + call->reply_size,
+ len) < 0)
+ BUG();
+ call->reply_size += len;
+ }
+
+ afs_data_consumed(call, skb);
+ if (!last)
+ return -EAGAIN;
+
+ if (call->reply_size != call->reply_max) {
+ _leave(" = -EBADMSG [%u != %u]",
+ call->reply_size, call->reply_max);
+ return -EBADMSG;
+ }
+ return 0;
}
/*
@@ -745,7 +767,8 @@
}
/*
- * grab the operation ID from an incoming cache manager call
+ * Grab the operation ID from an incoming cache manager call. The socket
+ * buffer is discarded on error or if we don't yet have sufficient data.
*/
static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
bool last)
@@ -766,12 +789,9 @@
call->offset += len;
if (call->offset < 4) {
- if (last) {
- _leave(" = -EBADMSG [op ID short]");
- return -EBADMSG;
- }
- _leave(" = 0 [incomplete]");
- return 0;
+ afs_data_consumed(call, skb);
+ _leave(" = -EAGAIN");
+ return -EAGAIN;
}
call->state = AFS_CALL_AWAIT_REQUEST;
@@ -855,7 +875,7 @@
}
/*
- * extract a piece of data from the received data socket buffers
+ * Extract a piece of data from the received data socket buffers.
*/
int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
bool last, void *buf, size_t count)
@@ -873,10 +893,7 @@
call->offset += len;
if (call->offset < count) {
- if (last) {
- _leave(" = -EBADMSG [%d < %zu]", call->offset, count);
- return -EBADMSG;
- }
+ afs_data_consumed(call, skb);
_leave(" = -EAGAIN");
return -EAGAIN;
}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 340afd0..f94d1ab 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -64,16 +64,13 @@
struct afs_cache_vlocation *entry;
__be32 *bp;
u32 tmp;
- int loop;
+ int loop, ret;
_enter(",,%u", last);
- afs_transfer_reply(call, skb);
- if (!last)
- return 0;
-
- if (call->reply_size != call->reply_max)
- return -EBADMSG;
+ ret = afs_transfer_reply(call, skb, last);
+ if (ret < 0)
+ return ret;
/* unmarshall the reply once we've received all of it */
entry = call->reply;
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index b6d210e..d9ddcfc 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -862,33 +862,6 @@
return 0;
}
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 ref_root, u64 bytenr, u64 num_bytes)
-{
- struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_delayed_ref_head *ref_head;
- int ret = 0;
-
- if (!fs_info->quota_enabled || !is_fstree(ref_root))
- return 0;
-
- delayed_refs = &trans->transaction->delayed_refs;
-
- spin_lock(&delayed_refs->lock);
- ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
- if (!ref_head) {
- ret = -ENOENT;
- goto out;
- }
- WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
- ref_head->qgroup_ref_root = ref_root;
- ref_head->qgroup_reserved = num_bytes;
-out:
- spin_unlock(&delayed_refs->lock);
- return ret;
-}
-
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 5fca953..43f3629 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -250,9 +250,6 @@
u64 parent, u64 ref_root,
u64 owner, u64 offset, u64 reserved, int action,
struct btrfs_delayed_extent_op *extent_op);
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 ref_root, u64 bytenr, u64 num_bytes);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9404121..5842423 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2033,6 +2033,14 @@
*/
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
+ /*
+ * An ordered extent might have started before and completed
+ * already with io errors, in which case the inode was not
+ * updated and we end up here. So check the inode's mapping
+ * flags for any errors that might have happened while doing
+ * writeback of file data.
+ */
+ ret = btrfs_inode_check_errors(inode);
inode_unlock(inode);
goto out;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2f59759..08dfc57 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3435,10 +3435,10 @@
found_key.offset = 0;
inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
ret = PTR_ERR_OR_ZERO(inode);
- if (ret && ret != -ESTALE)
+ if (ret && ret != -ENOENT)
goto out;
- if (ret == -ESTALE && root == root->fs_info->tree_root) {
+ if (ret == -ENOENT && root == root->fs_info->tree_root) {
struct btrfs_root *dead_root;
struct btrfs_fs_info *fs_info = root->fs_info;
int is_dead_root = 0;
@@ -3474,7 +3474,7 @@
* Inode is already gone but the orphan item is still there,
* kill the orphan item.
*/
- if (ret == -ESTALE) {
+ if (ret == -ENOENT) {
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -3633,7 +3633,7 @@
/*
* read an inode from the btree into the in-memory inode
*/
-static void btrfs_read_locked_inode(struct inode *inode)
+static int btrfs_read_locked_inode(struct inode *inode)
{
struct btrfs_path *path;
struct extent_buffer *leaf;
@@ -3652,14 +3652,19 @@
filled = true;
path = btrfs_alloc_path();
- if (!path)
+ if (!path) {
+ ret = -ENOMEM;
goto make_bad;
+ }
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
- if (ret)
+ if (ret) {
+ if (ret > 0)
+ ret = -ENOENT;
goto make_bad;
+ }
leaf = path->nodes[0];
@@ -3812,11 +3817,12 @@
}
btrfs_update_iflags(inode);
- return;
+ return 0;
make_bad:
btrfs_free_path(path);
make_bad_inode(inode);
+ return ret;
}
/*
@@ -4204,6 +4210,7 @@
int err = 0;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_trans_handle *trans;
+ u64 last_unlink_trans;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
@@ -4226,11 +4233,27 @@
if (err)
goto out;
+ last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+
/* now the directory is empty */
err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
dentry->d_name.name, dentry->d_name.len);
- if (!err)
+ if (!err) {
btrfs_i_size_write(inode, 0);
+ /*
+ * Propagate the last_unlink_trans value of the deleted dir to
+ * its parent directory. This is to prevent an unrecoverable
+ * log tree in the case we do something like this:
+ * 1) create dir foo
+ * 2) create snapshot under dir foo
+ * 3) delete the snapshot
+ * 4) rmdir foo
+ * 5) mkdir foo
+ * 6) fsync foo or some file inside foo
+ */
+ if (last_unlink_trans >= trans->transid)
+ BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
+ }
out:
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
@@ -5606,7 +5629,9 @@
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
- btrfs_read_locked_inode(inode);
+ int ret;
+
+ ret = btrfs_read_locked_inode(inode);
if (!is_bad_inode(inode)) {
inode_tree_add(inode);
unlock_new_inode(inode);
@@ -5615,7 +5640,8 @@
} else {
unlock_new_inode(inode);
iput(inode);
- inode = ERR_PTR(-ESTALE);
+ ASSERT(ret < 0);
+ inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
}
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index b71dd29..efe129f 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -231,7 +231,6 @@
u64 parent_ino;
u64 ino;
u64 gen;
- bool is_orphan;
struct list_head update_refs;
};
@@ -274,6 +273,39 @@
char name[];
};
+static void inconsistent_snapshot_error(struct send_ctx *sctx,
+ enum btrfs_compare_tree_result result,
+ const char *what)
+{
+ const char *result_string;
+
+ switch (result) {
+ case BTRFS_COMPARE_TREE_NEW:
+ result_string = "new";
+ break;
+ case BTRFS_COMPARE_TREE_DELETED:
+ result_string = "deleted";
+ break;
+ case BTRFS_COMPARE_TREE_CHANGED:
+ result_string = "updated";
+ break;
+ case BTRFS_COMPARE_TREE_SAME:
+ ASSERT(0);
+ result_string = "unchanged";
+ break;
+ default:
+ ASSERT(0);
+ result_string = "unexpected";
+ }
+
+ btrfs_err(sctx->send_root->fs_info,
+ "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
+ result_string, what, sctx->cmp_key->objectid,
+ sctx->send_root->root_key.objectid,
+ (sctx->parent_root ?
+ sctx->parent_root->root_key.objectid : 0));
+}
+
static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
static struct waiting_dir_move *
@@ -1861,7 +1893,8 @@
* was already unlinked/moved, so we can safely assume that we will not
* overwrite anything at this point in time.
*/
- if (other_inode > sctx->send_progress) {
+ if (other_inode > sctx->send_progress ||
+ is_waiting_for_move(sctx, other_inode)) {
ret = get_inode_info(sctx->parent_root, other_inode, NULL,
who_gen, NULL, NULL, NULL, NULL);
if (ret < 0)
@@ -2502,6 +2535,8 @@
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
+ if (ret > 0)
+ ret = -ENOENT;
if (ret < 0)
goto out;
@@ -2947,6 +2982,10 @@
}
if (loc.objectid > send_progress) {
+ struct orphan_dir_info *odi;
+
+ odi = get_orphan_dir_info(sctx, dir);
+ free_orphan_dir_info(sctx, odi);
ret = 0;
goto out;
}
@@ -3047,7 +3086,6 @@
pm->parent_ino = parent_ino;
pm->ino = ino;
pm->gen = ino_gen;
- pm->is_orphan = is_orphan;
INIT_LIST_HEAD(&pm->list);
INIT_LIST_HEAD(&pm->update_refs);
RB_CLEAR_NODE(&pm->node);
@@ -3113,6 +3151,48 @@
return NULL;
}
+static int path_loop(struct send_ctx *sctx, struct fs_path *name,
+ u64 ino, u64 gen, u64 *ancestor_ino)
+{
+ int ret = 0;
+ u64 parent_inode = 0;
+ u64 parent_gen = 0;
+ u64 start_ino = ino;
+
+ *ancestor_ino = 0;
+ while (ino != BTRFS_FIRST_FREE_OBJECTID) {
+ fs_path_reset(name);
+
+ if (is_waiting_for_rm(sctx, ino))
+ break;
+ if (is_waiting_for_move(sctx, ino)) {
+ if (*ancestor_ino == 0)
+ *ancestor_ino = ino;
+ ret = get_first_ref(sctx->parent_root, ino,
+ &parent_inode, &parent_gen, name);
+ } else {
+ ret = __get_cur_name_and_parent(sctx, ino, gen,
+ &parent_inode,
+ &parent_gen, name);
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+ if (ret < 0)
+ break;
+ if (parent_inode == start_ino) {
+ ret = 1;
+ if (*ancestor_ino == 0)
+ *ancestor_ino = ino;
+ break;
+ }
+ ino = parent_inode;
+ gen = parent_gen;
+ }
+ return ret;
+}
+
static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
{
struct fs_path *from_path = NULL;
@@ -3123,6 +3203,8 @@
u64 parent_ino, parent_gen;
struct waiting_dir_move *dm = NULL;
u64 rmdir_ino = 0;
+ u64 ancestor;
+ bool is_orphan;
int ret;
name = fs_path_alloc();
@@ -3135,9 +3217,10 @@
dm = get_waiting_dir_move(sctx, pm->ino);
ASSERT(dm);
rmdir_ino = dm->rmdir_ino;
+ is_orphan = dm->orphanized;
free_waiting_dir_move(sctx, dm);
- if (pm->is_orphan) {
+ if (is_orphan) {
ret = gen_unique_name(sctx, pm->ino,
pm->gen, from_path);
} else {
@@ -3155,6 +3238,24 @@
goto out;
sctx->send_progress = sctx->cur_ino + 1;
+ ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ LIST_HEAD(deleted_refs);
+ ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
+ ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
+ &pm->update_refs, &deleted_refs,
+ is_orphan);
+ if (ret < 0)
+ goto out;
+ if (rmdir_ino) {
+ dm = get_waiting_dir_move(sctx, pm->ino);
+ ASSERT(dm);
+ dm->rmdir_ino = rmdir_ino;
+ }
+ goto out;
+ }
fs_path_reset(name);
to_path = name;
name = NULL;
@@ -3174,7 +3275,7 @@
/* already deleted */
goto finish;
}
- ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1);
+ ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
if (ret < 0)
goto out;
if (!ret)
@@ -3204,8 +3305,18 @@
* and old parent(s).
*/
list_for_each_entry(cur, &pm->update_refs, list) {
- if (cur->dir == rmdir_ino)
+ /*
+ * The parent inode might have been deleted in the send snapshot
+ */
+ ret = get_inode_info(sctx->send_root, cur->dir, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ if (ret == -ENOENT) {
+ ret = 0;
continue;
+ }
+ if (ret < 0)
+ goto out;
+
ret = send_utimes(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
@@ -3325,6 +3436,7 @@
u64 left_gen;
u64 right_gen;
int ret = 0;
+ struct waiting_dir_move *wdm;
if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
return 0;
@@ -3383,7 +3495,8 @@
goto out;
}
- if (is_waiting_for_move(sctx, di_key.objectid)) {
+ wdm = get_waiting_dir_move(sctx, di_key.objectid);
+ if (wdm && !wdm->orphanized) {
ret = add_pending_dir_move(sctx,
sctx->cur_ino,
sctx->cur_inode_gen,
@@ -3470,7 +3583,8 @@
ret = is_ancestor(sctx->parent_root,
sctx->cur_ino, sctx->cur_inode_gen,
ino, path_before);
- break;
+ if (ret)
+ break;
}
fs_path_reset(path_before);
@@ -3643,11 +3757,26 @@
goto out;
if (ret) {
struct name_cache_entry *nce;
+ struct waiting_dir_move *wdm;
ret = orphanize_inode(sctx, ow_inode, ow_gen,
cur->full_path);
if (ret < 0)
goto out;
+
+ /*
+ * If ow_inode has its rename operation delayed
+ * make sure that its orphanized name is used in
+ * the source path when performing its rename
+ * operation.
+ */
+ if (is_waiting_for_move(sctx, ow_inode)) {
+ wdm = get_waiting_dir_move(sctx,
+ ow_inode);
+ ASSERT(wdm);
+ wdm->orphanized = true;
+ }
+
/*
* Make sure we clear our orphanized inode's
* name from the name cache. This is because the
@@ -3663,6 +3792,19 @@
name_cache_delete(sctx, nce);
kfree(nce);
}
+
+ /*
+ * ow_inode might currently be an ancestor of
+ * cur_ino, therefore compute valid_path (the
+ * current path of cur_ino) again because it
+ * might contain the pre-orphanization name of
+ * ow_inode, which is no longer valid.
+ */
+ fs_path_reset(valid_path);
+ ret = get_cur_path(sctx, sctx->cur_ino,
+ sctx->cur_inode_gen, valid_path);
+ if (ret < 0)
+ goto out;
} else {
ret = send_unlink(sctx, cur->full_path);
if (ret < 0)
@@ -5602,7 +5744,10 @@
{
int ret = 0;
- BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+ if (sctx->cur_ino != sctx->cmp_key->objectid) {
+ inconsistent_snapshot_error(sctx, result, "reference");
+ return -EIO;
+ }
if (!sctx->cur_inode_new_gen &&
sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
@@ -5627,7 +5772,10 @@
{
int ret = 0;
- BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+ if (sctx->cur_ino != sctx->cmp_key->objectid) {
+ inconsistent_snapshot_error(sctx, result, "xattr");
+ return -EIO;
+ }
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result == BTRFS_COMPARE_TREE_NEW)
@@ -5651,7 +5799,10 @@
{
int ret = 0;
- BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+ if (sctx->cur_ino != sctx->cmp_key->objectid) {
+ inconsistent_snapshot_error(sctx, result, "extent");
+ return -EIO;
+ }
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result != BTRFS_COMPARE_TREE_DELETED)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d31a0c4..fff3f3e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4469,7 +4469,8 @@
static int btrfs_check_ref_name_override(struct extent_buffer *eb,
const int slot,
const struct btrfs_key *key,
- struct inode *inode)
+ struct inode *inode,
+ u64 *other_ino)
{
int ret;
struct btrfs_path *search_path;
@@ -4528,7 +4529,16 @@
search_path, parent,
name, this_name_len, 0);
if (di && !IS_ERR(di)) {
- ret = 1;
+ struct btrfs_key di_key;
+
+ btrfs_dir_item_key_to_cpu(search_path->nodes[0],
+ di, &di_key);
+ if (di_key.type == BTRFS_INODE_ITEM_KEY) {
+ ret = 1;
+ *other_ino = di_key.objectid;
+ } else {
+ ret = -EAGAIN;
+ }
goto out;
} else if (IS_ERR(di)) {
ret = PTR_ERR(di);
@@ -4722,16 +4732,71 @@
if ((min_key.type == BTRFS_INODE_REF_KEY ||
min_key.type == BTRFS_INODE_EXTREF_KEY) &&
BTRFS_I(inode)->generation == trans->transid) {
+ u64 other_ino = 0;
+
ret = btrfs_check_ref_name_override(path->nodes[0],
path->slots[0],
- &min_key, inode);
+ &min_key, inode,
+ &other_ino);
if (ret < 0) {
err = ret;
goto out_unlock;
} else if (ret > 0) {
- err = 1;
- btrfs_set_log_full_commit(root->fs_info, trans);
- goto out_unlock;
+ struct btrfs_key inode_key;
+ struct inode *other_inode;
+
+ if (ins_nr > 0) {
+ ins_nr++;
+ } else {
+ ins_nr = 1;
+ ins_start_slot = path->slots[0];
+ }
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, ins_start_slot,
+ ins_nr, inode_only,
+ logged_isize);
+ if (ret < 0) {
+ err = ret;
+ goto out_unlock;
+ }
+ ins_nr = 0;
+ btrfs_release_path(path);
+ inode_key.objectid = other_ino;
+ inode_key.type = BTRFS_INODE_ITEM_KEY;
+ inode_key.offset = 0;
+ other_inode = btrfs_iget(root->fs_info->sb,
+ &inode_key, root,
+ NULL);
+ /*
+ * If the other inode that had a conflicting dir
+ * entry was deleted in the current transaction,
+ * we don't need to do more work nor fallback to
+ * a transaction commit.
+ */
+ if (IS_ERR(other_inode) &&
+ PTR_ERR(other_inode) == -ENOENT) {
+ goto next_key;
+ } else if (IS_ERR(other_inode)) {
+ err = PTR_ERR(other_inode);
+ goto out_unlock;
+ }
+ /*
+ * We are safe logging the other inode without
+ * acquiring its i_mutex as long as we log with
+ * the LOG_INODE_EXISTS mode. We're safe against
+ * concurrent renames of the other inode as well
+ * because during a rename we pin the log and
+ * update the log with the new name before we
+ * unpin it.
+ */
+ err = btrfs_log_inode(trans, root, other_inode,
+ LOG_INODE_EXISTS,
+ 0, LLONG_MAX, ctx);
+ iput(other_inode);
+ if (err)
+ goto out_unlock;
+ else
+ goto next_key;
}
}
@@ -4799,7 +4864,7 @@
ins_nr = 0;
}
btrfs_release_path(path);
-
+next_key:
if (min_key.offset < (u64)-1) {
min_key.offset++;
} else if (min_key.type < max_key.type) {
@@ -4993,8 +5058,12 @@
if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
break;
- if (IS_ROOT(parent))
+ if (IS_ROOT(parent)) {
+ inode = d_inode(parent);
+ if (btrfs_must_commit_transaction(trans, inode))
+ ret = 1;
break;
+ }
parent = dget_parent(parent);
dput(old_parent);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 99115ca..16e6ded 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1347,9 +1347,12 @@
{
struct inode *inode = &ci->vfs_inode;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
- struct ceph_mds_session *session = *psession;
+ struct ceph_mds_session *session = NULL;
int mds;
+
dout("ceph_flush_snaps %p\n", inode);
+ if (psession)
+ session = *psession;
retry:
spin_lock(&ci->i_ceph_lock);
if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index fa59a85..f72d4ae 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2759,6 +2759,7 @@
} else {
path = NULL;
pathlen = 0;
+ pathbase = 0;
}
spin_lock(&ci->i_ceph_lock);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4d09d44..05713a5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1949,6 +1949,12 @@
{
struct backing_dev_info *bdi;
+ /*
+ * If we are expecting writeback progress we must submit plugged IO.
+ */
+ if (blk_needs_flush_plug(current))
+ blk_schedule_flush_plug(current);
+
if (!nr_pages)
nr_pages = get_nr_dirty_pages();
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 33da841..6f47527 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -338,6 +338,8 @@
case 0:
break;
case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_BAD_STATEID:
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 324bfdc..9bf64ea 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -396,6 +396,10 @@
extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
extern void nfs4_kill_renewd(struct nfs_client *);
extern void nfs4_renew_state(struct work_struct *);
+extern void nfs4_set_lease_period(struct nfs_client *clp,
+ unsigned long lease,
+ unsigned long lastrenewed);
+
/* nfs4state.c */
struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a036e93..1949bbd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4237,12 +4237,9 @@
err = _nfs4_do_fsinfo(server, fhandle, fsinfo);
trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err);
if (err == 0) {
- struct nfs_client *clp = server->nfs_client;
-
- spin_lock(&clp->cl_lock);
- clp->cl_lease_time = fsinfo->lease_time * HZ;
- clp->cl_last_renewal = now;
- spin_unlock(&clp->cl_lock);
+ nfs4_set_lease_period(server->nfs_client,
+ fsinfo->lease_time * HZ,
+ now);
break;
}
err = nfs4_handle_exception(server, err, &exception);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index e1ba58c..82e7719 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -136,6 +136,26 @@
cancel_delayed_work_sync(&clp->cl_renewd);
}
+/**
+ * nfs4_set_lease_period - Sets the lease period on a nfs_client
+ *
+ * @clp: pointer to nfs_client
+ * @lease: new value for lease period
+ * @lastrenewed: time at which lease was last renewed
+ */
+void nfs4_set_lease_period(struct nfs_client *clp,
+ unsigned long lease,
+ unsigned long lastrenewed)
+{
+ spin_lock(&clp->cl_lock);
+ clp->cl_lease_time = lease;
+ clp->cl_last_renewal = lastrenewed;
+ spin_unlock(&clp->cl_lock);
+
+ /* Cap maximum reconnect timeout at 1/2 lease period */
+ rpc_cap_max_reconnect_timeout(clp->cl_rpcclient, lease >> 1);
+}
+
/*
* Local variables:
* c-basic-offset: 8
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 834b875..cada00a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -277,20 +277,17 @@
{
int status;
struct nfs_fsinfo fsinfo;
+ unsigned long now;
if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
nfs4_schedule_state_renewal(clp);
return 0;
}
+ now = jiffies;
status = nfs4_proc_get_lease_time(clp, &fsinfo);
if (status == 0) {
- /* Update lease time and schedule renewal */
- spin_lock(&clp->cl_lock);
- clp->cl_lease_time = fsinfo.lease_time * HZ;
- clp->cl_last_renewal = jiffies;
- spin_unlock(&clp->cl_lock);
-
+ nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now);
nfs4_schedule_state_renewal(clp);
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8410ca2..a204d7e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4903,6 +4903,32 @@
return nfs_ok;
}
+static __be32
+nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
+{
+ struct nfs4_ol_stateid *stp = openlockstateid(s);
+ __be32 ret;
+
+ mutex_lock(&stp->st_mutex);
+
+ ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
+ if (ret)
+ goto out;
+
+ ret = nfserr_locks_held;
+ if (check_for_locks(stp->st_stid.sc_file,
+ lockowner(stp->st_stateowner)))
+ goto out;
+
+ release_lock_stateid(stp);
+ ret = nfs_ok;
+
+out:
+ mutex_unlock(&stp->st_mutex);
+ nfs4_put_stid(s);
+ return ret;
+}
+
__be32
nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_free_stateid *free_stateid)
@@ -4910,7 +4936,6 @@
stateid_t *stateid = &free_stateid->fr_stateid;
struct nfs4_stid *s;
struct nfs4_delegation *dp;
- struct nfs4_ol_stateid *stp;
struct nfs4_client *cl = cstate->session->se_client;
__be32 ret = nfserr_bad_stateid;
@@ -4929,18 +4954,9 @@
ret = nfserr_locks_held;
break;
case NFS4_LOCK_STID:
- ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
- if (ret)
- break;
- stp = openlockstateid(s);
- ret = nfserr_locks_held;
- if (check_for_locks(stp->st_stid.sc_file,
- lockowner(stp->st_stateowner)))
- break;
- WARN_ON(!unhash_lock_stateid(stp));
+ atomic_inc(&s->sc_count);
spin_unlock(&cl->cl_lock);
- nfs4_put_stid(s);
- ret = nfs_ok;
+ ret = nfsd4_free_lock_stateid(stateid, s);
goto out;
case NFS4_REVOKED_DELEG_STID:
dp = delegstateid(s);
@@ -5507,7 +5523,7 @@
lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
struct nfs4_ol_stateid *ost,
struct nfsd4_lock *lock,
- struct nfs4_ol_stateid **lst, bool *new)
+ struct nfs4_ol_stateid **plst, bool *new)
{
__be32 status;
struct nfs4_file *fi = ost->st_stid.sc_file;
@@ -5515,7 +5531,9 @@
struct nfs4_client *cl = oo->oo_owner.so_client;
struct inode *inode = d_inode(cstate->current_fh.fh_dentry);
struct nfs4_lockowner *lo;
+ struct nfs4_ol_stateid *lst;
unsigned int strhashval;
+ bool hashed;
lo = find_lockowner_str(cl, &lock->lk_new_owner);
if (!lo) {
@@ -5531,12 +5549,27 @@
goto out;
}
- *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
- if (*lst == NULL) {
+retry:
+ lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
+ if (lst == NULL) {
status = nfserr_jukebox;
goto out;
}
+
+ mutex_lock(&lst->st_mutex);
+
+ /* See if it's still hashed to avoid race with FREE_STATEID */
+ spin_lock(&cl->cl_lock);
+ hashed = !list_empty(&lst->st_perfile);
+ spin_unlock(&cl->cl_lock);
+
+ if (!hashed) {
+ mutex_unlock(&lst->st_mutex);
+ nfs4_put_stid(&lst->st_stid);
+ goto retry;
+ }
status = nfs_ok;
+ *plst = lst;
out:
nfs4_put_stateowner(&lo->lo_owner);
return status;
@@ -5603,8 +5636,6 @@
goto out;
status = lookup_or_create_lock_state(cstate, open_stp, lock,
&lock_stp, &new);
- if (status == nfs_ok)
- mutex_lock(&lock_stp->st_mutex);
} else {
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_old_lock_seqid,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ba944123..ff476e6 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1252,10 +1252,13 @@
if (IS_ERR(dchild))
return nfserrno(host_err);
err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
- if (err) {
- dput(dchild);
+ /*
+ * We unconditionally drop our ref to dchild as fh_compose will have
+ * already grabbed its own ref for it.
+ */
+ dput(dchild);
+ if (err)
return err;
- }
return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
rdev, resfhp);
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 4b32928..4ebe6b2 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -144,10 +144,8 @@
struct page *page = buf->page;
if (page_count(page) == 1) {
- if (memcg_kmem_enabled()) {
+ if (memcg_kmem_enabled())
memcg_kmem_uncharge(page, 0);
- __ClearPageKmemcg(page);
- }
__SetPageLocked(page);
return 0;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 09e18fd..b9a8c81 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -46,7 +46,7 @@
cached = 0;
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
- pages[lru] = global_page_state(NR_LRU_BASE + lru);
+ pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
available = si_mem_available();
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 54a8e65..7d026bf 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -25,7 +25,20 @@
#include <asm-generic/qrwlock_types.h>
/*
- * Writer states & reader shift and bias
+ * Writer states & reader shift and bias.
+ *
+ * | +0 | +1 | +2 | +3 |
+ * ----+----+----+----+----+
+ * LE | 78 | 56 | 34 | 12 | 0x12345678
+ * ----+----+----+----+----+
+ * | wr | rd |
+ * +----+----+----+----+
+ *
+ * ----+----+----+----+----+
+ * BE | 12 | 34 | 56 | 78 | 0x12345678
+ * ----+----+----+----+----+
+ * | rd | wr |
+ * +----+----+----+----+
*/
#define _QW_WAITING 1 /* A writer is waiting */
#define _QW_LOCKED 0xff /* A writer holds the lock */
@@ -134,12 +147,22 @@
}
/**
+ * __qrwlock_write_byte - retrieve the write byte address of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: the write byte address of a queue rwlock
+ */
+static inline u8 *__qrwlock_write_byte(struct qrwlock *lock)
+{
+ return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
+}
+
+/**
* queued_write_unlock - release write lock of a queue rwlock
* @lock : Pointer to queue rwlock structure
*/
static inline void queued_write_unlock(struct qrwlock *lock)
{
- smp_store_release((u8 *)&lock->cnts, 0);
+ smp_store_release(__qrwlock_write_byte(lock), 0);
}
/*
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 4348d6d..99c6d01 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -962,6 +962,7 @@
*
* @bo: A pointer to a struct ttm_buffer_object.
* @evict: 1: This is an eviction. Don't try to pipeline.
+ * @interruptible: Sleep interruptible if waiting.
* @no_wait_gpu: Return immediately if the GPU is busy.
* @new_mem: struct ttm_mem_reg indicating where to move.
*
@@ -976,7 +977,7 @@
*/
extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
- bool evict, bool no_wait_gpu,
+ bool evict, bool interruptible, bool no_wait_gpu,
struct ttm_mem_reg *new_mem);
/**
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 701b64a..89b65b8 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -74,7 +74,8 @@
"Attempted to advance past end of bvec iter\n");
while (bytes) {
- unsigned len = min(bytes, bvec_iter_len(bv, *iter));
+ unsigned iter_len = bvec_iter_len(bv, *iter);
+ unsigned len = min(bytes, iter_len);
bytes -= len;
iter->bi_size -= len;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 56b0b7e..99ac022 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -337,6 +337,7 @@
*/
#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107
#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109
+#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307
#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507
#define E_ITS_MAPD_DEVICE_OOR 0x010801
#define E_ITS_MAPC_PROCNUM_OOR 0x010902
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 01e908a..9c28b4d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1113,9 +1113,21 @@
/* create, destroy, and name are mandatory */
struct kvm_device_ops {
const char *name;
+
+ /*
+ * create is called holding kvm->lock and any operations not suitable
+ * to do while holding the lock should be deferred to init (see
+ * below).
+ */
int (*create)(struct kvm_device *dev, u32 type);
/*
+ * init is called after create if create is successful and is called
+ * outside of holding kvm->lock.
+ */
+ void (*init)(struct kvm_device *dev);
+
+ /*
* Destroy is responsible for freeing dev.
*
* Destroy may be called before or after destructors are called
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f2e4e90..d572b78 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -68,8 +68,10 @@
#ifdef CONFIG_CMA
# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
+# define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
#else
# define is_migrate_cma(migratetype) false
+# define is_migrate_cma_page(_page) false
#endif
#define for_each_migratetype_order(order, type) \
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 4f0bfe5..e8c81fb 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -270,6 +270,8 @@
MSI_FLAG_MULTI_PCI_MSI = (1 << 2),
/* Support PCI MSIX interrupts */
MSI_FLAG_PCI_MSIX = (1 << 3),
+ /* Needs early activate, required for PCI */
+ MSI_FLAG_ACTIVATE_EARLY = (1 << 4),
};
int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 076df53..3a788bf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3891,8 +3891,7 @@
extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
void netdev_rss_key_fill(void *buffer, size_t len);
-int dev_get_nest_level(struct net_device *dev,
- bool (*type_check)(const struct net_device *dev));
+int dev_get_nest_level(struct net_device *dev);
int skb_checksum_help(struct sk_buff *skb);
struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
netdev_features_t features, bool tx_path);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8ed43261..2b6b43c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -743,7 +743,9 @@
u64 parent_gen;
u64 generation;
int pin_count;
+#ifdef CONFIG_CGROUP_PERF
int nr_cgroups; /* cgroup evts */
+#endif
void *task_ctx_data; /* pmu specific data */
struct rcu_head rcu_head;
};
@@ -769,7 +771,9 @@
unsigned int hrtimer_active;
struct pmu *unique_pmu;
+#ifdef CONFIG_CGROUP_PERF
struct perf_cgroup *cgrp;
+#endif
};
struct perf_output_handle {
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 8dc155d..696a56b 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -266,39 +266,21 @@
* and other debug macros are compiled out unless either DEBUG is defined
* or CONFIG_DYNAMIC_DEBUG is set.
*/
-
-#ifdef CONFIG_PRINTK
-
-asmlinkage __printf(1, 2) __cold void __pr_emerg(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_alert(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_crit(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_err(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_warn(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_notice(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_info(const char *fmt, ...);
-
-#define pr_emerg(fmt, ...) __pr_emerg(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_alert(fmt, ...) __pr_alert(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_crit(fmt, ...) __pr_crit(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_err(fmt, ...) __pr_err(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn(fmt, ...) __pr_warn(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_notice(fmt, ...) __pr_notice(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_info(fmt, ...) __pr_info(pr_fmt(fmt), ##__VA_ARGS__)
-
-#else
-
-#define pr_emerg(fmt, ...) printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_alert(fmt, ...) printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_crit(fmt, ...) printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_err(fmt, ...) printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn(fmt, ...) printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_notice(fmt, ...) printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_info(fmt, ...) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-
-#endif
-
-#define pr_warning pr_warn
-
+#define pr_emerg(fmt, ...) \
+ printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert(fmt, ...) \
+ printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit(fmt, ...) \
+ printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err(fmt, ...) \
+ printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning(fmt, ...) \
+ printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn pr_warning
+#define pr_notice(fmt, ...) \
+ printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...) \
+ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
/*
* Like KERN_CONT, pr_cont() should only be used when continuing
* a line with no newline ('\n') enclosed. Otherwise it defaults
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index b1e3c57..d6c4177 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -70,8 +70,16 @@
u8 max_tc;
};
+enum qed_dcbx_sf_ieee_type {
+ QED_DCBX_SF_IEEE_ETHTYPE,
+ QED_DCBX_SF_IEEE_TCP_PORT,
+ QED_DCBX_SF_IEEE_UDP_PORT,
+ QED_DCBX_SF_IEEE_TCP_UDP_PORT
+};
+
struct qed_app_entry {
bool ethtype;
+ enum qed_dcbx_sf_ieee_type sf_ieee;
bool enabled;
u8 prio;
u16 proto_id;
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index de1f643..fcb4c36 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -705,70 +705,6 @@
sctp_authhdr_t auth_hdr;
} __packed sctp_auth_chunk_t;
-struct sctp_info {
- __u32 sctpi_tag;
- __u32 sctpi_state;
- __u32 sctpi_rwnd;
- __u16 sctpi_unackdata;
- __u16 sctpi_penddata;
- __u16 sctpi_instrms;
- __u16 sctpi_outstrms;
- __u32 sctpi_fragmentation_point;
- __u32 sctpi_inqueue;
- __u32 sctpi_outqueue;
- __u32 sctpi_overall_error;
- __u32 sctpi_max_burst;
- __u32 sctpi_maxseg;
- __u32 sctpi_peer_rwnd;
- __u32 sctpi_peer_tag;
- __u8 sctpi_peer_capable;
- __u8 sctpi_peer_sack;
- __u16 __reserved1;
-
- /* assoc status info */
- __u64 sctpi_isacks;
- __u64 sctpi_osacks;
- __u64 sctpi_opackets;
- __u64 sctpi_ipackets;
- __u64 sctpi_rtxchunks;
- __u64 sctpi_outofseqtsns;
- __u64 sctpi_idupchunks;
- __u64 sctpi_gapcnt;
- __u64 sctpi_ouodchunks;
- __u64 sctpi_iuodchunks;
- __u64 sctpi_oodchunks;
- __u64 sctpi_iodchunks;
- __u64 sctpi_octrlchunks;
- __u64 sctpi_ictrlchunks;
-
- /* primary transport info */
- struct sockaddr_storage sctpi_p_address;
- __s32 sctpi_p_state;
- __u32 sctpi_p_cwnd;
- __u32 sctpi_p_srtt;
- __u32 sctpi_p_rto;
- __u32 sctpi_p_hbinterval;
- __u32 sctpi_p_pathmaxrxt;
- __u32 sctpi_p_sackdelay;
- __u32 sctpi_p_sackfreq;
- __u32 sctpi_p_ssthresh;
- __u32 sctpi_p_partial_bytes_acked;
- __u32 sctpi_p_flight_size;
- __u16 sctpi_p_error;
- __u16 __reserved2;
-
- /* sctp sock info */
- __u32 sctpi_s_autoclose;
- __u32 sctpi_s_adaptation_ind;
- __u32 sctpi_s_pd_point;
- __u8 sctpi_s_nodelay;
- __u8 sctpi_s_disable_fragments;
- __u8 sctpi_s_v4mapped;
- __u8 sctpi_s_frag_interleave;
- __u32 sctpi_s_type;
- __u32 __reserved3;
-};
-
struct sctp_infox {
struct sctp_info *sctpinfo;
struct sctp_association *asoc;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6f0b3e0..0f665cb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2847,6 +2847,18 @@
__skb_linearize(skb) : 0;
}
+static __always_inline void
+__skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
+ unsigned int off)
+{
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_block_sub(skb->csum,
+ csum_partial(start, len, 0), off);
+ else if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_start_offset(skb) < 0)
+ skb->ip_summed = CHECKSUM_NONE;
+}
+
/**
* skb_postpull_rcsum - update checksum for received skb after pull
* @skb: buffer to update
@@ -2857,36 +2869,38 @@
* update the CHECKSUM_COMPLETE checksum, or set ip_summed to
* CHECKSUM_NONE so that it can be recomputed from scratch.
*/
-
static inline void skb_postpull_rcsum(struct sk_buff *skb,
const void *start, unsigned int len)
{
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
- else if (skb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_start_offset(skb) < 0)
- skb->ip_summed = CHECKSUM_NONE;
+ __skb_postpull_rcsum(skb, start, len, 0);
}
-unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+static __always_inline void
+__skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
+ unsigned int off)
+{
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_block_add(skb->csum,
+ csum_partial(start, len, 0), off);
+}
+/**
+ * skb_postpush_rcsum - update checksum for received skb after push
+ * @skb: buffer to update
+ * @start: start of data after push
+ * @len: length of data pushed
+ *
+ * After doing a push on a received packet, you need to call this to
+ * update the CHECKSUM_COMPLETE checksum.
+ */
static inline void skb_postpush_rcsum(struct sk_buff *skb,
const void *start, unsigned int len)
{
- /* For performing the reverse operation to skb_postpull_rcsum(),
- * we can instead of ...
- *
- * skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
- *
- * ... just use this equivalent version here to save a few
- * instructions. Feeding csum of 0 in csum_partial() and later
- * on adding skb->csum is equivalent to feed skb->csum in the
- * first place.
- */
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_partial(start, len, skb->csum);
+ __skb_postpush_rcsum(skb, start, len, 0);
}
+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+
/**
* skb_push_rcsum - push skb and update receive checksum
* @skb: buffer to update
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 1a4ea55..4293808 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -155,6 +155,18 @@
void kzfree(const void *);
size_t ksize(const void *);
+#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
+const char *__check_heap_object(const void *ptr, unsigned long n,
+ struct page *page);
+#else
+static inline const char *__check_heap_object(const void *ptr,
+ unsigned long n,
+ struct page *page)
+{
+ return NULL;
+}
+#endif
+
/*
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index b6810c9..5c02b06 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -195,6 +195,8 @@
struct rpc_xprt *,
void *),
void *data);
+void rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+ unsigned long timeo);
const char *rpc_proc_name(const struct rpc_task *task);
#endif /* __KERNEL__ */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 5e3e1b6..a16070d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -218,7 +218,8 @@
struct work_struct task_cleanup;
struct timer_list timer;
unsigned long last_used,
- idle_timeout;
+ idle_timeout,
+ max_reconnect_timeout;
/*
* Send stuff
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 352b154..cbd8990 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -105,6 +105,30 @@
#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
+static inline int arch_within_stack_frames(const void * const stack,
+ const void * const stackend,
+ const void *obj, unsigned long len)
+{
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_HARDENED_USERCOPY
+extern void __check_object_size(const void *ptr, unsigned long n,
+ bool to_user);
+
+static inline void check_object_size(const void *ptr, unsigned long n,
+ bool to_user)
+{
+ __check_object_size(ptr, n, to_user);
+}
+#else
+static inline void check_object_size(const void *ptr, unsigned long n,
+ bool to_user)
+{ }
+#endif /* CONFIG_HARDENED_USERCOPY */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_THREAD_INFO_H */
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 3495578..f30c187 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -114,8 +114,8 @@
#ifndef user_access_begin
#define user_access_begin() do { } while (0)
#define user_access_end() do { } while (0)
-#define unsafe_get_user(x, ptr) __get_user(x, ptr)
-#define unsafe_put_user(x, ptr) __put_user(x, ptr)
+#define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0)
+#define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
#endif
#endif /* __LINUX_UACCESS_H__ */
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 41e6a24..82f3c91 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -176,8 +176,8 @@
int tcf_unregister_action(struct tc_action_ops *a,
struct pernet_operations *ops);
int tcf_action_destroy(struct list_head *actions, int bind);
-int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
- struct tcf_result *res);
+int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
+ int nr_actions, struct tcf_result *res);
int tcf_action_init(struct net *net, struct nlattr *nla,
struct nlattr *est, char *n, int ovr,
int bind, struct list_head *);
@@ -189,30 +189,17 @@
int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
-#define tc_no_actions(_exts) \
- (list_empty(&(_exts)->actions))
-
-#define tc_for_each_action(_a, _exts) \
- list_for_each_entry(a, &(_exts)->actions, list)
-
-#define tc_single_action(_exts) \
- (list_is_singular(&(_exts)->actions))
+#endif /* CONFIG_NET_CLS_ACT */
static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
u64 packets, u64 lastuse)
{
+#ifdef CONFIG_NET_CLS_ACT
if (!a->ops->stats_update)
return;
a->ops->stats_update(a, bytes, packets, lastuse);
+#endif
}
-#else /* CONFIG_NET_CLS_ACT */
-
-#define tc_no_actions(_exts) true
-#define tc_for_each_action(_a, _exts) while ((void)(_a), 0)
-#define tc_single_action(_exts) false
-#define tcf_action_stats_update(a, bytes, packets, lastuse)
-
-#endif /* CONFIG_NET_CLS_ACT */
#endif
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index ac1bc3c..7b0f886 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -40,12 +40,12 @@
unsigned long,
gfp_t);
int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *, size_t);
+void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
void rxrpc_kernel_abort_call(struct rxrpc_call *, u32);
void rxrpc_kernel_end_call(struct rxrpc_call *);
bool rxrpc_kernel_is_data_last(struct sk_buff *);
u32 rxrpc_kernel_get_abort_code(struct sk_buff *);
int rxrpc_kernel_get_error_number(struct sk_buff *);
-void rxrpc_kernel_data_delivered(struct sk_buff *);
void rxrpc_kernel_free_skb(struct sk_buff *);
struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long);
int rxrpc_kernel_reject_call(struct socket *);
diff --git a/include/net/gre.h b/include/net/gre.h
index 7a54a31..73ea256 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -104,6 +104,7 @@
skb_push(skb, hdr_len);
+ skb_set_inner_protocol(skb, proto);
skb_reset_transport_header(skb);
greh = (struct gre_base_hdr *)skb->data;
greh->flags = gre_tnl_flags_to_gre_flags(flags);
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 0dc0a51..dce2d58 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -128,7 +128,8 @@
to = from | htonl(INET_ECN_CE << 20);
*(__be32 *)iph = to;
if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->csum = csum_add(csum_sub(skb->csum, from), to);
+ skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
+ (__force __wsum)to);
return 1;
}
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b4faadb..cca510a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3620,7 +3620,8 @@
int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
- u32 (*get_expected_throughput)(struct ieee80211_sta *sta);
+ u32 (*get_expected_throughput)(struct ieee80211_hw *hw,
+ struct ieee80211_sta *sta);
int (*get_txpower)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
int *dbm);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6f8d653..c99508d 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -59,7 +59,8 @@
struct tcf_exts {
#ifdef CONFIG_NET_CLS_ACT
__u32 type; /* for backward compat(TCA_OLD_COMPAT) */
- struct list_head actions;
+ int nr_actions;
+ struct tc_action **actions;
#endif
/* Map to export classifier specific extension TLV types to the
* generic extensions API. Unsupported extensions must be set to 0.
@@ -72,7 +73,10 @@
{
#ifdef CONFIG_NET_CLS_ACT
exts->type = 0;
- INIT_LIST_HEAD(&exts->actions);
+ exts->nr_actions = 0;
+ exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
+ GFP_KERNEL);
+ WARN_ON(!exts->actions); /* TODO: propagate the error to callers */
#endif
exts->action = action;
exts->police = police;
@@ -89,7 +93,7 @@
tcf_exts_is_predicative(struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
- return !list_empty(&exts->actions);
+ return exts->nr_actions;
#else
return 0;
#endif
@@ -108,6 +112,20 @@
return tcf_exts_is_predicative(exts);
}
+static inline void tcf_exts_to_list(const struct tcf_exts *exts,
+ struct list_head *actions)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ int i;
+
+ for (i = 0; i < exts->nr_actions; i++) {
+ struct tc_action *a = exts->actions[i];
+
+ list_add(&a->list, actions);
+ }
+#endif
+}
+
/**
* tcf_exts_exec - execute tc filter extensions
* @skb: socket buffer
@@ -124,12 +142,25 @@
struct tcf_result *res)
{
#ifdef CONFIG_NET_CLS_ACT
- if (!list_empty(&exts->actions))
- return tcf_action_exec(skb, &exts->actions, res);
+ if (exts->nr_actions)
+ return tcf_action_exec(skb, exts->actions, exts->nr_actions,
+ res);
#endif
return 0;
}
+#ifdef CONFIG_NET_CLS_ACT
+
+#define tc_no_actions(_exts) ((_exts)->nr_actions == 0)
+#define tc_single_action(_exts) ((_exts)->nr_actions == 1)
+
+#else /* CONFIG_NET_CLS_ACT */
+
+#define tc_no_actions(_exts) true
+#define tc_single_action(_exts) false
+
+#endif /* CONFIG_NET_CLS_ACT */
+
int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
struct nlattr **tb, struct nlattr *rate_tlv,
struct tcf_exts *exts, bool ovr);
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 5144013..28c5da6 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -330,24 +330,32 @@
#ifdef CONFIG_NO_HZ_COMMON
#define TICK_DEP_NAMES \
- tick_dep_name(NONE) \
+ tick_dep_mask_name(NONE) \
tick_dep_name(POSIX_TIMER) \
tick_dep_name(PERF_EVENTS) \
tick_dep_name(SCHED) \
tick_dep_name_end(CLOCK_UNSTABLE)
#undef tick_dep_name
+#undef tick_dep_mask_name
#undef tick_dep_name_end
-#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
-#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+/* The MASK will convert to their bits and they need to be processed too */
+#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \
+ TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \
+ TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+/* NONE only has a mask defined for it */
+#define tick_dep_mask_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
TICK_DEP_NAMES
#undef tick_dep_name
+#undef tick_dep_mask_name
#undef tick_dep_name_end
#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
+#define tick_dep_mask_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
#define show_tick_dep_name(val) \
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index da218fe..9e5fc16 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -339,7 +339,7 @@
BPF_FUNC_skb_change_type,
/**
- * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+ * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb
* @skb: pointer to skb
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
@@ -348,7 +348,7 @@
* == 1 skb succeeded the cgroup2 descendant test
* < 0 error
*/
- BPF_FUNC_skb_in_cgroup,
+ BPF_FUNC_skb_under_cgroup,
/**
* bpf_get_hash_recalc(skb)
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 01751fa..c674ba2 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -24,7 +24,7 @@
__NFT_REG_MAX,
NFT_REG32_00 = 8,
- MFT_REG32_01,
+ NFT_REG32_01,
NFT_REG32_02,
NFT_REG32_03,
NFT_REG32_04,
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index d304f4c..a406adc 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -944,4 +944,68 @@
__u16 pr_policy;
};
+struct sctp_info {
+ __u32 sctpi_tag;
+ __u32 sctpi_state;
+ __u32 sctpi_rwnd;
+ __u16 sctpi_unackdata;
+ __u16 sctpi_penddata;
+ __u16 sctpi_instrms;
+ __u16 sctpi_outstrms;
+ __u32 sctpi_fragmentation_point;
+ __u32 sctpi_inqueue;
+ __u32 sctpi_outqueue;
+ __u32 sctpi_overall_error;
+ __u32 sctpi_max_burst;
+ __u32 sctpi_maxseg;
+ __u32 sctpi_peer_rwnd;
+ __u32 sctpi_peer_tag;
+ __u8 sctpi_peer_capable;
+ __u8 sctpi_peer_sack;
+ __u16 __reserved1;
+
+ /* assoc status info */
+ __u64 sctpi_isacks;
+ __u64 sctpi_osacks;
+ __u64 sctpi_opackets;
+ __u64 sctpi_ipackets;
+ __u64 sctpi_rtxchunks;
+ __u64 sctpi_outofseqtsns;
+ __u64 sctpi_idupchunks;
+ __u64 sctpi_gapcnt;
+ __u64 sctpi_ouodchunks;
+ __u64 sctpi_iuodchunks;
+ __u64 sctpi_oodchunks;
+ __u64 sctpi_iodchunks;
+ __u64 sctpi_octrlchunks;
+ __u64 sctpi_ictrlchunks;
+
+ /* primary transport info */
+ struct sockaddr_storage sctpi_p_address;
+ __s32 sctpi_p_state;
+ __u32 sctpi_p_cwnd;
+ __u32 sctpi_p_srtt;
+ __u32 sctpi_p_rto;
+ __u32 sctpi_p_hbinterval;
+ __u32 sctpi_p_pathmaxrxt;
+ __u32 sctpi_p_sackdelay;
+ __u32 sctpi_p_sackfreq;
+ __u32 sctpi_p_ssthresh;
+ __u32 sctpi_p_partial_bytes_acked;
+ __u32 sctpi_p_flight_size;
+ __u16 sctpi_p_error;
+ __u16 __reserved2;
+
+ /* sctp sock info */
+ __u32 sctpi_s_autoclose;
+ __u32 sctpi_s_adaptation_ind;
+ __u32 sctpi_s_pd_point;
+ __u8 sctpi_s_nodelay;
+ __u8 sctpi_s_disable_fragments;
+ __u8 sctpi_s_v4mapped;
+ __u8 sctpi_s_frag_interleave;
+ __u32 sctpi_s_type;
+ __u32 __reserved3;
+};
+
#endif /* _UAPI_SCTP_H */
diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
index 6b011c1..1d57ed3 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -32,7 +32,7 @@
*/
#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H
-#define _UAPI_LINUX_VIRTIO_VOSCK_H
+#define _UAPI_LINUX_VIRTIO_VSOCK_H
#include <linux/types.h>
#include <linux/virtio_ids.h>
diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h
index cbae529..180d526 100644
--- a/include/uapi/misc/cxl.h
+++ b/include/uapi/misc/cxl.h
@@ -136,8 +136,8 @@
*
* Of course the contents will be ABI, but that's up the AFU driver.
*/
- size_t data_size;
- u8 data[];
+ __u32 data_size;
+ __u8 data[];
};
struct cxl_event {
diff --git a/init/Kconfig b/init/Kconfig
index 6988649..cac3f09 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1761,6 +1761,7 @@
config SLAB
bool "SLAB"
+ select HAVE_HARDENED_USERCOPY_ALLOCATOR
help
The regular slab allocator that is established and known to work
well in all environments. It organizes cache hot objects in
@@ -1768,6 +1769,7 @@
config SLUB
bool "SLUB (Unqueued Allocator)"
+ select HAVE_HARDENED_USERCOPY_ALLOCATOR
help
SLUB is a slab allocator that minimizes cache line usage
instead of managing queues of cached objects (SLAB approach).
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index fff3650..570eeca 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -26,11 +26,18 @@
struct bucket *buckets;
void *elems;
struct pcpu_freelist freelist;
+ void __percpu *extra_elems;
atomic_t count; /* number of elements in this hashtable */
u32 n_buckets; /* number of hash buckets */
u32 elem_size; /* size of each element in bytes */
};
+enum extra_elem_state {
+ HTAB_NOT_AN_EXTRA_ELEM = 0,
+ HTAB_EXTRA_ELEM_FREE,
+ HTAB_EXTRA_ELEM_USED
+};
+
/* each htab element is struct htab_elem + key + value */
struct htab_elem {
union {
@@ -38,7 +45,10 @@
struct bpf_htab *htab;
struct pcpu_freelist_node fnode;
};
- struct rcu_head rcu;
+ union {
+ struct rcu_head rcu;
+ enum extra_elem_state state;
+ };
u32 hash;
char key[0] __aligned(8);
};
@@ -113,6 +123,23 @@
return err;
}
+static int alloc_extra_elems(struct bpf_htab *htab)
+{
+ void __percpu *pptr;
+ int cpu;
+
+ pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+ if (!pptr)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
+ HTAB_EXTRA_ELEM_FREE;
+ }
+ htab->extra_elems = pptr;
+ return 0;
+}
+
/* Called from syscall */
static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
{
@@ -185,6 +212,8 @@
if (percpu)
cost += (u64) round_up(htab->map.value_size, 8) *
num_possible_cpus() * htab->map.max_entries;
+ else
+ cost += (u64) htab->elem_size * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
/* make sure page count doesn't overflow */
@@ -212,14 +241,22 @@
raw_spin_lock_init(&htab->buckets[i].lock);
}
- if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
- err = prealloc_elems_and_freelist(htab);
+ if (!percpu) {
+ err = alloc_extra_elems(htab);
if (err)
goto free_buckets;
}
+ if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
+ err = prealloc_elems_and_freelist(htab);
+ if (err)
+ goto free_extra_elems;
+ }
+
return &htab->map;
+free_extra_elems:
+ free_percpu(htab->extra_elems);
free_buckets:
kvfree(htab->buckets);
free_htab:
@@ -349,7 +386,6 @@
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
kfree(l);
-
}
static void htab_elem_free_rcu(struct rcu_head *head)
@@ -370,6 +406,11 @@
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
{
+ if (l->state == HTAB_EXTRA_ELEM_USED) {
+ l->state = HTAB_EXTRA_ELEM_FREE;
+ return;
+ }
+
if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
@@ -381,25 +422,44 @@
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
void *value, u32 key_size, u32 hash,
- bool percpu, bool onallcpus)
+ bool percpu, bool onallcpus,
+ bool old_elem_exists)
{
u32 size = htab->map.value_size;
bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
struct htab_elem *l_new;
void __percpu *pptr;
+ int err = 0;
if (prealloc) {
l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
if (!l_new)
- return ERR_PTR(-E2BIG);
+ err = -E2BIG;
} else {
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
atomic_dec(&htab->count);
- return ERR_PTR(-E2BIG);
+ err = -E2BIG;
+ } else {
+ l_new = kmalloc(htab->elem_size,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (!l_new)
+ return ERR_PTR(-ENOMEM);
}
- l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
- if (!l_new)
- return ERR_PTR(-ENOMEM);
+ }
+
+ if (err) {
+ if (!old_elem_exists)
+ return ERR_PTR(err);
+
+ /* if we're updating the existing element and the hash table
+ * is full, use per-cpu extra elems
+ */
+ l_new = this_cpu_ptr(htab->extra_elems);
+ if (l_new->state != HTAB_EXTRA_ELEM_FREE)
+ return ERR_PTR(-E2BIG);
+ l_new->state = HTAB_EXTRA_ELEM_USED;
+ } else {
+ l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
}
memcpy(l_new->key, key, key_size);
@@ -489,7 +549,8 @@
if (ret)
goto err;
- l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
+ l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
+ !!l_old);
if (IS_ERR(l_new)) {
/* all pre-allocated elements are in use or memory exhausted */
ret = PTR_ERR(l_new);
@@ -563,7 +624,7 @@
}
} else {
l_new = alloc_htab_elem(htab, key, value, key_size,
- hash, true, onallcpus);
+ hash, true, onallcpus, false);
if (IS_ERR(l_new)) {
ret = PTR_ERR(l_new);
goto err;
@@ -652,6 +713,7 @@
htab_free_elems(htab);
pcpu_freelist_destroy(&htab->freelist);
}
+ free_percpu(htab->extra_elems);
kvfree(htab->buckets);
kfree(htab);
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f72f23b..daea765 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -194,6 +194,7 @@
struct verifier_state_list **explored_states; /* search pruning optimization */
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
+ u32 id_gen; /* used to generate unique reg IDs */
bool allow_ptr_leaks;
};
@@ -1052,7 +1053,7 @@
goto error;
break;
case BPF_MAP_TYPE_CGROUP_ARRAY:
- if (func_id != BPF_FUNC_skb_in_cgroup)
+ if (func_id != BPF_FUNC_skb_under_cgroup)
goto error;
break;
default:
@@ -1074,7 +1075,7 @@
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
goto error;
break;
- case BPF_FUNC_skb_in_cgroup:
+ case BPF_FUNC_skb_under_cgroup:
if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
goto error;
break;
@@ -1301,7 +1302,7 @@
/* dst_reg stays as pkt_ptr type and since some positive
* integer value was added to the pointer, increment its 'id'
*/
- dst_reg->id++;
+ dst_reg->id = ++env->id_gen;
/* something was added to pkt_ptr, set range and off to zero */
dst_reg->off = 0;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a19550d..1903b8f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -843,6 +843,32 @@
}
}
}
+
+/*
+ * Update cpuctx->cgrp so that it is set when first cgroup event is added and
+ * cleared when last cgroup event is removed.
+ */
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+ struct perf_event_context *ctx, bool add)
+{
+ struct perf_cpu_context *cpuctx;
+
+ if (!is_cgroup_event(event))
+ return;
+
+ if (add && ctx->nr_cgroups++)
+ return;
+ else if (!add && --ctx->nr_cgroups)
+ return;
+ /*
+ * Because cgroup events are always per-cpu events,
+ * this will always be called from the right CPU.
+ */
+ cpuctx = __get_cpu_context(ctx);
+ cpuctx->cgrp = add ? event->cgrp : NULL;
+}
+
#else /* !CONFIG_CGROUP_PERF */
static inline bool
@@ -920,6 +946,13 @@
struct perf_event_context *ctx)
{
}
+
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+ struct perf_event_context *ctx, bool add)
+{
+}
+
#endif
/*
@@ -1392,6 +1425,7 @@
static void
list_add_event(struct perf_event *event, struct perf_event_context *ctx)
{
+
lockdep_assert_held(&ctx->lock);
WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1412,8 +1446,7 @@
list_add_tail(&event->group_entry, list);
}
- if (is_cgroup_event(event))
- ctx->nr_cgroups++;
+ list_update_cgroup_event(event, ctx, true);
list_add_rcu(&event->event_entry, &ctx->event_list);
ctx->nr_events++;
@@ -1581,8 +1614,6 @@
static void
list_del_event(struct perf_event *event, struct perf_event_context *ctx)
{
- struct perf_cpu_context *cpuctx;
-
WARN_ON_ONCE(event->ctx != ctx);
lockdep_assert_held(&ctx->lock);
@@ -1594,20 +1625,7 @@
event->attach_state &= ~PERF_ATTACH_CONTEXT;
- if (is_cgroup_event(event)) {
- ctx->nr_cgroups--;
- /*
- * Because cgroup events are always per-cpu events, this will
- * always be called from the right CPU.
- */
- cpuctx = __get_cpu_context(ctx);
- /*
- * If there are no more cgroup events then clear cgrp to avoid
- * stale pointer in update_cgrp_time_from_cpuctx().
- */
- if (!ctx->nr_cgroups)
- cpuctx->cgrp = NULL;
- }
+ list_update_cgroup_event(event, ctx, false);
ctx->nr_events--;
if (event->attr.inherit_stat)
@@ -1716,8 +1734,8 @@
static inline int
event_filter_match(struct perf_event *event)
{
- return (event->cpu == -1 || event->cpu == smp_processor_id())
- && perf_cgroup_match(event) && pmu_filter_match(event);
+ return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
+ perf_cgroup_match(event) && pmu_filter_match(event);
}
static void
@@ -1737,8 +1755,8 @@
* maintained, otherwise bogus information is return
* via read() for time_enabled, time_running:
*/
- if (event->state == PERF_EVENT_STATE_INACTIVE
- && !event_filter_match(event)) {
+ if (event->state == PERF_EVENT_STATE_INACTIVE &&
+ !event_filter_match(event)) {
delta = tstamp - event->tstamp_stopped;
event->tstamp_running += delta;
event->tstamp_stopped = tstamp;
@@ -2236,10 +2254,15 @@
lockdep_assert_held(&ctx->mutex);
- event->ctx = ctx;
if (event->cpu != -1)
event->cpu = cpu;
+ /*
+ * Ensures that if we can observe event->ctx, both the event and ctx
+ * will be 'complete'. See perf_iterate_sb_cpu().
+ */
+ smp_store_release(&event->ctx, ctx);
+
if (!task) {
cpu_function_call(cpu, __perf_install_in_context, event);
return;
@@ -5969,6 +5992,14 @@
struct perf_event *event;
list_for_each_entry_rcu(event, &pel->list, sb_list) {
+ /*
+ * Skip events that are not fully formed yet; ensure that
+ * if we observe event->ctx, both event and ctx will be
+ * complete enough. See perf_install_in_context().
+ */
+ if (!smp_load_acquire(&event->ctx))
+ continue;
+
if (event->state < PERF_EVENT_STATE_INACTIVE)
continue;
if (!event_filter_match(event))
diff --git a/kernel/futex.c b/kernel/futex.c
index 33664f7..46cb3a3 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -179,7 +179,15 @@
* Futex flags used to encode options to functions and preserve them across
* restarts.
*/
-#define FLAGS_SHARED 0x01
+#ifdef CONFIG_MMU
+# define FLAGS_SHARED 0x01
+#else
+/*
+ * NOMMU does not have per process address space. Let the compiler optimize
+ * code away.
+ */
+# define FLAGS_SHARED 0x00
+#endif
#define FLAGS_CLOCKRT 0x02
#define FLAGS_HAS_TIMEOUT 0x04
@@ -405,6 +413,16 @@
if (!key->both.ptr)
return;
+ /*
+ * On MMU less systems futexes are always "private" as there is no per
+ * process address space. We need the smp wmb nevertheless - yes,
+ * arch/blackfin has MMU less SMP ...
+ */
+ if (!IS_ENABLED(CONFIG_MMU)) {
+ smp_mb(); /* explicit smp_mb(); (B) */
+ return;
+ }
+
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
ihold(key->shared.inode); /* implies smp_mb(); (B) */
@@ -436,6 +454,9 @@
return;
}
+ if (!IS_ENABLED(CONFIG_MMU))
+ return;
+
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
iput(key->shared.inode);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 5499935..19e9dfb 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -359,6 +359,17 @@
else
dev_dbg(dev, "irq [%d-%d] for MSI\n",
virq, virq + desc->nvec_used - 1);
+ /*
+ * This flag is set by the PCI layer as we need to activate
+ * the MSI entries before the PCI layer enables MSI in the
+ * card. Otherwise the card latches a random msi message.
+ */
+ if (info->flags & MSI_FLAG_ACTIVATE_EARLY) {
+ struct irq_data *irq_data;
+
+ irq_data = irq_domain_get_irq_data(domain, desc->irq);
+ irq_domain_activate_irq(irq_data);
+ }
}
return 0;
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 37649e6..8a99abf 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -450,7 +450,7 @@
goto gotlock;
}
}
- WRITE_ONCE(pn->state, vcpu_halted);
+ WRITE_ONCE(pn->state, vcpu_hashed);
qstat_inc(qstat_pv_wait_head, true);
qstat_inc(qstat_pv_wait_again, waitcnt);
pv_wait(&l->locked, _Q_SLOW_VAL);
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index 22e0253..b9d0315 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -153,7 +153,6 @@
*/
if ((counter == qstat_pv_latency_kick) ||
(counter == qstat_pv_latency_wake)) {
- stat = 0;
if (kicks)
stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
}
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index a881c6a..33c79b6 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -300,12 +300,12 @@
save_processor_state();
trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
error = swsusp_arch_suspend();
+ /* Restore control flow magically appears here */
+ restore_processor_state();
trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
if (error)
printk(KERN_ERR "PM: Error %d creating hibernation image\n",
error);
- /* Restore control flow magically appears here */
- restore_processor_state();
if (!in_suspend)
events_check_enabled = false;
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 5d4505f..7fd2838 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -16,11 +16,9 @@
*/
#include <linux/percpu.h>
-typedef __printf(2, 0) int (*printk_func_t)(int level, const char *fmt,
- va_list args);
+typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
-__printf(2, 0)
-int vprintk_default(int level, const char *fmt, va_list args);
+int __printf(1, 0) vprintk_default(const char *fmt, va_list args);
#ifdef CONFIG_PRINTK_NMI
@@ -33,10 +31,9 @@
* via per-CPU variable.
*/
DECLARE_PER_CPU(printk_func_t, printk_func);
-__printf(2, 0)
-static inline int vprintk_func(int level, const char *fmt, va_list args)
+static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
{
- return this_cpu_read(printk_func)(level, fmt, args);
+ return this_cpu_read(printk_func)(fmt, args);
}
extern atomic_t nmi_message_lost;
@@ -47,10 +44,9 @@
#else /* CONFIG_PRINTK_NMI */
-__printf(2, 0)
-static inline int vprintk_func(int level, const char *fmt, va_list args)
+static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
{
- return vprintk_default(level, fmt, args);
+ return vprintk_default(fmt, args);
}
static inline int get_nmi_message_lost(void)
diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c
index bc3eeb1..b69eb8a 100644
--- a/kernel/printk/nmi.c
+++ b/kernel/printk/nmi.c
@@ -58,7 +58,7 @@
* one writer running. But the buffer might get flushed from another
* CPU, so we need to be careful.
*/
-static int vprintk_nmi(int level, const char *fmt, va_list args)
+static int vprintk_nmi(const char *fmt, va_list args)
{
struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
int add = 0;
@@ -79,16 +79,7 @@
if (!len)
smp_rmb();
- if (level != LOGLEVEL_DEFAULT) {
- add = snprintf(s->buffer + len, sizeof(s->buffer) - len,
- KERN_SOH "%c", '0' + level);
- add += vsnprintf(s->buffer + len + add,
- sizeof(s->buffer) - len - add,
- fmt, args);
- } else {
- add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len,
- fmt, args);
- }
+ add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args);
/*
* Do it once again if the buffer has been flushed in the meantime.
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a5ef95c..eea6dbc 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1930,28 +1930,7 @@
}
EXPORT_SYMBOL(printk_emit);
-#ifdef CONFIG_PRINTK
-#define define_pr_level(func, loglevel) \
-asmlinkage __visible void func(const char *fmt, ...) \
-{ \
- va_list args; \
- \
- va_start(args, fmt); \
- vprintk_default(loglevel, fmt, args); \
- va_end(args); \
-} \
-EXPORT_SYMBOL(func)
-
-define_pr_level(__pr_emerg, LOGLEVEL_EMERG);
-define_pr_level(__pr_alert, LOGLEVEL_ALERT);
-define_pr_level(__pr_crit, LOGLEVEL_CRIT);
-define_pr_level(__pr_err, LOGLEVEL_ERR);
-define_pr_level(__pr_warn, LOGLEVEL_WARNING);
-define_pr_level(__pr_notice, LOGLEVEL_NOTICE);
-define_pr_level(__pr_info, LOGLEVEL_INFO);
-#endif
-
-int vprintk_default(int level, const char *fmt, va_list args)
+int vprintk_default(const char *fmt, va_list args)
{
int r;
@@ -1961,7 +1940,7 @@
return r;
}
#endif
- r = vprintk_emit(0, level, NULL, 0, fmt, args);
+ r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
return r;
}
@@ -1994,7 +1973,7 @@
int r;
va_start(args, fmt);
- r = vprintk_func(LOGLEVEL_DEFAULT, fmt, args);
+ r = vprintk_func(fmt, args);
va_end(args);
return r;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5c883fe..2a906f2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -74,6 +74,7 @@
#include <linux/context_tracking.h>
#include <linux/compiler.h>
#include <linux/frame.h>
+#include <linux/prefetch.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -2972,6 +2973,23 @@
EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
/*
+ * The function fair_sched_class.update_curr accesses the struct curr
+ * and its field curr->exec_start; when called from task_sched_runtime(),
+ * we observe a high rate of cache misses in practice.
+ * Prefetching this data results in improved performance.
+ */
+static inline void prefetch_curr_exec_start(struct task_struct *p)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+#else
+ struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
+#endif
+ prefetch(curr);
+ prefetch(&curr->exec_start);
+}
+
+/*
* Return accounted runtime for the task.
* In case the task is currently running, return the runtime plus current's
* pending runtime that have not been accounted yet.
@@ -3005,6 +3023,7 @@
* thread, breaking clock_gettime().
*/
if (task_current(rq, p) && task_on_rq_queued(p)) {
+ prefetch_curr_exec_start(p);
update_rq_clock(rq);
p->sched_class->update_curr(rq);
}
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 5be5882..d418449 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -168,7 +168,7 @@
if (old_idx == IDX_INVALID) {
cp->size++;
- cp->elements[cp->size - 1].dl = 0;
+ cp->elements[cp->size - 1].dl = dl;
cp->elements[cp->size - 1].cpu = cpu;
cp->elements[cpu].idx = cp->size - 1;
cpudl_change_key(cp, cp->size - 1, dl);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 1934f65..9858266 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -508,13 +508,21 @@
*/
void account_idle_ticks(unsigned long ticks)
{
+ cputime_t cputime, steal;
if (sched_clock_irqtime) {
irqtime_account_idle_ticks(ticks);
return;
}
- account_idle_time(jiffies_to_cputime(ticks));
+ cputime = jiffies_to_cputime(ticks);
+ steal = steal_account_process_time(cputime);
+
+ if (steal >= cputime)
+ return;
+
+ cputime -= steal;
+ account_idle_time(cputime);
}
/*
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index fcb7f02..1ce8867 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -658,8 +658,11 @@
*
* XXX figure out if select_task_rq_dl() deals with offline cpus.
*/
- if (unlikely(!rq->online))
+ if (unlikely(!rq->online)) {
+ lockdep_unpin_lock(&rq->lock, rf.cookie);
rq = dl_task_offline_migration(rq, p);
+ rf.cookie = lockdep_pin_lock(&rq->lock);
+ }
/*
* Queueing this task back might have overloaded rq, check if we need
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4088eed..039de34 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4269,7 +4269,7 @@
pcfs_rq = tg->parent->cfs_rq[cpu];
cfs_rq->throttle_count = pcfs_rq->throttle_count;
- pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
+ cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
}
/* conditionally throttle active cfs_rq's from put_prev_entity() */
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 555670a..32bf6f7 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1496,6 +1496,7 @@
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
u64 expires = KTIME_MAX;
unsigned long nextevt;
+ bool is_max_delta;
/*
* Pretend that there is no timer pending if the cpu is offline.
@@ -1506,6 +1507,7 @@
spin_lock(&base->lock);
nextevt = __next_timer_interrupt(base);
+ is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
base->next_expiry = nextevt;
/*
* We have a fresh next event. Check whether we can forward the base:
@@ -1519,7 +1521,8 @@
expires = basem;
base->is_idle = false;
} else {
- expires = basem + (nextevt - basej) * TICK_NSEC;
+ if (!is_max_delta)
+ expires = basem + (nextevt - basej) * TICK_NSEC;
/*
* If we expect to sleep more than a tick, mark the base idle:
*/
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 5d845ff..5ba520b 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -30,7 +30,7 @@
#define HASH_DEFAULT_SIZE 64UL
#define HASH_MIN_SIZE 4U
-#define BUCKET_LOCKS_PER_CPU 128UL
+#define BUCKET_LOCKS_PER_CPU 32UL
static u32 head_hashfn(struct rhashtable *ht,
const struct bucket_table *tbl,
@@ -70,7 +70,7 @@
unsigned int nr_pcpus = num_possible_cpus();
#endif
- nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+ nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
/* Never allocate more than 0.5 locks per bucket */
@@ -83,6 +83,9 @@
tbl->locks = vmalloc(size * sizeof(spinlock_t));
else
#endif
+ if (gfp != GFP_KERNEL)
+ gfp |= __GFP_NOWARN | __GFP_NORETRY;
+
tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
gfp);
if (!tbl->locks)
@@ -321,12 +324,14 @@
static int rhashtable_shrink(struct rhashtable *ht)
{
struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
- unsigned int size;
+ unsigned int nelems = atomic_read(&ht->nelems);
+ unsigned int size = 0;
int err;
ASSERT_RHT_MUTEX(ht);
- size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+ if (nelems)
+ size = roundup_pow_of_two(nelems * 3 / 2);
if (size < ht->p.min_size)
size = ht->p.min_size;
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 33f655e..9c5fe81 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -40,8 +40,8 @@
unsigned long c, data;
/* Fall back to byte-at-a-time if we get a page fault */
- if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))
- break;
+ unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);
+
*(unsigned long *)(dst+res) = c;
if (has_zero(c, &data, &constants)) {
data = prep_zero_mask(c, data, &constants);
@@ -56,8 +56,7 @@
while (max) {
char c;
- if (unlikely(unsafe_get_user(c,src+res)))
- return -EFAULT;
+ unsafe_get_user(c,src+res, efault);
dst[res] = c;
if (!c)
return res;
@@ -76,6 +75,7 @@
* Nope: we hit the address space limit, and we still had more
* characters the caller would have wanted. That's an EFAULT.
*/
+efault:
return -EFAULT;
}
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 2625943..8e105ed 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -45,8 +45,7 @@
src -= align;
max += align;
- if (unlikely(unsafe_get_user(c,(unsigned long __user *)src)))
- return 0;
+ unsafe_get_user(c, (unsigned long __user *)src, efault);
c |= aligned_byte_mask(align);
for (;;) {
@@ -61,8 +60,7 @@
if (unlikely(max <= sizeof(unsigned long)))
break;
max -= sizeof(unsigned long);
- if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))
- return 0;
+ unsafe_get_user(c, (unsigned long __user *)(src+res), efault);
}
res -= align;
@@ -77,6 +75,7 @@
* Nope: we hit the address space limit, and we still had more
* characters the caller would have wanted. That's 0.
*/
+efault:
return 0;
}
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 297fdb5..64e899b 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -38,7 +38,7 @@
static int max_size = 0;
module_param(max_size, int, 0);
-MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)");
+MODULE_PARM_DESC(max_size, "Maximum table size (default: calculated)");
static bool shrinking = false;
module_param(shrinking, bool, 0);
diff --git a/mm/Makefile b/mm/Makefile
index fc05966..2ca1faf 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -21,6 +21,9 @@
KCOV_INSTRUMENT_mmzone.o := n
KCOV_INSTRUMENT_vmstat.o := n
+# Since __builtin_frame_address does work as used, disable the warning.
+CFLAGS_usercopy.o += $(call cc-disable-warning, frame-address)
+
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
@@ -99,3 +102,4 @@
obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
+obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b9aa1b0..87e11d8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1448,6 +1448,7 @@
list_del(&page->lru);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
+ h->max_huge_pages--;
update_and_free_page(h, page);
}
spin_unlock(&hugetlb_lock);
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index b6728a3..baabaad 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -217,11 +217,8 @@
new_quarantine_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
QUARANTINE_FRACTION;
percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus();
- if (WARN_ONCE(new_quarantine_size < percpu_quarantines,
- "Too little memory, disabling global KASAN quarantine.\n"))
- new_quarantine_size = 0;
- else
- new_quarantine_size -= percpu_quarantines;
+ new_quarantine_size = (new_quarantine_size < percpu_quarantines) ?
+ 0 : new_quarantine_size - percpu_quarantines;
WRITE_ONCE(quarantine_size, new_quarantine_size);
last = global_quarantine.head;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 66beca1..2ff0289 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2337,8 +2337,11 @@
return 0;
memcg = get_mem_cgroup_from_mm(current->mm);
- if (!mem_cgroup_is_root(memcg))
+ if (!mem_cgroup_is_root(memcg)) {
ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
+ if (!ret)
+ __SetPageKmemcg(page);
+ }
css_put(&memcg->css);
return ret;
}
@@ -2365,6 +2368,11 @@
page_counter_uncharge(&memcg->memsw, nr_pages);
page->mem_cgroup = NULL;
+
+ /* slab pages do not have PageKmemcg flag set */
+ if (PageKmemcg(page))
+ __ClearPageKmemcg(page);
+
css_put_many(&memcg->css, nr_pages);
}
#endif /* !CONFIG_SLOB */
@@ -4069,14 +4077,32 @@
static DEFINE_IDR(mem_cgroup_idr);
-static void mem_cgroup_id_get(struct mem_cgroup *memcg)
+static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
{
- atomic_inc(&memcg->id.ref);
+ atomic_add(n, &memcg->id.ref);
}
-static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
{
- if (atomic_dec_and_test(&memcg->id.ref)) {
+ while (!atomic_inc_not_zero(&memcg->id.ref)) {
+ /*
+ * The root cgroup cannot be destroyed, so it's refcount must
+ * always be >= 1.
+ */
+ if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
+ VM_BUG_ON(1);
+ break;
+ }
+ memcg = parent_mem_cgroup(memcg);
+ if (!memcg)
+ memcg = root_mem_cgroup;
+ }
+ return memcg;
+}
+
+static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
+{
+ if (atomic_sub_and_test(n, &memcg->id.ref)) {
idr_remove(&mem_cgroup_idr, memcg->id.id);
memcg->id.id = 0;
@@ -4085,6 +4111,16 @@
}
}
+static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
+{
+ mem_cgroup_id_get_many(memcg, 1);
+}
+
+static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
+{
+ mem_cgroup_id_put_many(memcg, 1);
+}
+
/**
* mem_cgroup_from_id - look up a memcg from a memcg id
* @id: the memcg id to look up
@@ -4719,6 +4755,8 @@
if (!mem_cgroup_is_root(mc.from))
page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
+ mem_cgroup_id_put_many(mc.from, mc.moved_swap);
+
/*
* we charged both to->memory and to->memsw, so we
* should uncharge to->memory.
@@ -4726,9 +4764,9 @@
if (!mem_cgroup_is_root(mc.to))
page_counter_uncharge(&mc.to->memory, mc.moved_swap);
- css_put_many(&mc.from->css, mc.moved_swap);
+ mem_cgroup_id_get_many(mc.to, mc.moved_swap);
+ css_put_many(&mc.to->css, mc.moved_swap);
- /* we've already done css_get(mc.to) */
mc.moved_swap = 0;
}
memcg_oom_recover(from);
@@ -5537,8 +5575,10 @@
else
nr_file += nr_pages;
pgpgout++;
- } else
+ } else {
nr_kmem += 1 << compound_order(page);
+ __ClearPageKmemcg(page);
+ }
page->mem_cgroup = NULL;
} while (next != page_list);
@@ -5790,7 +5830,7 @@
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
- struct mem_cgroup *memcg;
+ struct mem_cgroup *memcg, *swap_memcg;
unsigned short oldid;
VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -5805,16 +5845,27 @@
if (!memcg)
return;
- mem_cgroup_id_get(memcg);
- oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+ /*
+ * In case the memcg owning these pages has been offlined and doesn't
+ * have an ID allocated to it anymore, charge the closest online
+ * ancestor for the swap instead and transfer the memory+swap charge.
+ */
+ swap_memcg = mem_cgroup_id_get_online(memcg);
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
VM_BUG_ON_PAGE(oldid, page);
- mem_cgroup_swap_statistics(memcg, true);
+ mem_cgroup_swap_statistics(swap_memcg, true);
page->mem_cgroup = NULL;
if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, 1);
+ if (memcg != swap_memcg) {
+ if (!mem_cgroup_is_root(swap_memcg))
+ page_counter_charge(&swap_memcg->memsw, 1);
+ page_counter_uncharge(&memcg->memsw, 1);
+ }
+
/*
* Interrupts should be disabled here because the caller holds the
* mapping->tree_lock lock which is taken with interrupts-off. It is
@@ -5853,11 +5904,14 @@
if (!memcg)
return 0;
- if (!mem_cgroup_is_root(memcg) &&
- !page_counter_try_charge(&memcg->swap, 1, &counter))
- return -ENOMEM;
+ memcg = mem_cgroup_id_get_online(memcg);
- mem_cgroup_id_get(memcg);
+ if (!mem_cgroup_is_root(memcg) &&
+ !page_counter_try_charge(&memcg->swap, 1, &counter)) {
+ mem_cgroup_id_put(memcg);
+ return -ENOMEM;
+ }
+
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
mem_cgroup_swap_statistics(memcg, true);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3894b65..41266dc 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1219,6 +1219,7 @@
/* init node's zones as empty zones, we don't have any present pages.*/
free_area_init_node(nid, zones_size, start_pfn, zholes_size);
+ pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
/*
* The node we allocated has no zone fallback lists. For avoiding
@@ -1249,6 +1250,7 @@
static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
{
arch_refresh_nodedata(nid, NULL);
+ free_percpu(pgdat->per_cpu_nodestats);
arch_free_nodedata(pgdat);
return;
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 7d0a275..d53a9aa 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -764,7 +764,7 @@
{
struct mm_struct *mm = task->mm;
struct task_struct *p;
- bool ret;
+ bool ret = true;
/*
* Skip tasks without mm because it might have passed its exit_mm and
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fb975ce..3fbe73a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1008,10 +1008,8 @@
}
if (PageMappingFlags(page))
page->mapping = NULL;
- if (memcg_kmem_enabled() && PageKmemcg(page)) {
+ if (memcg_kmem_enabled() && PageKmemcg(page))
memcg_kmem_uncharge(page, order);
- __ClearPageKmemcg(page);
- }
if (check_free)
bad += free_pages_check(page);
if (bad)
@@ -3756,12 +3754,10 @@
}
out:
- if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page) {
- if (unlikely(memcg_kmem_charge(page, gfp_mask, order))) {
- __free_pages(page, order);
- page = NULL;
- } else
- __SetPageKmemcg(page);
+ if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
+ unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) {
+ __free_pages(page, order);
+ page = NULL;
}
if (kmemcheck_enabled && page)
@@ -4064,7 +4060,7 @@
int lru;
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
- pages[lru] = global_page_state(NR_LRU_BASE + lru);
+ pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
for_each_zone(zone)
wmark_low += zone->watermark[WMARK_LOW];
@@ -4761,6 +4757,8 @@
}
#endif
+static void setup_min_unmapped_ratio(void);
+static void setup_min_slab_ratio(void);
#else /* CONFIG_NUMA */
static void set_zonelist_order(void)
@@ -5882,9 +5880,6 @@
zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
#ifdef CONFIG_NUMA
zone->node = nid;
- pgdat->min_unmapped_pages += (freesize*sysctl_min_unmapped_ratio)
- / 100;
- pgdat->min_slab_pages += (freesize * sysctl_min_slab_ratio) / 100;
#endif
zone->name = zone_names[j];
zone->zone_pgdat = pgdat;
@@ -6805,6 +6800,12 @@
setup_per_zone_wmarks();
refresh_zone_stat_thresholds();
setup_per_zone_lowmem_reserve();
+
+#ifdef CONFIG_NUMA
+ setup_min_unmapped_ratio();
+ setup_min_slab_ratio();
+#endif
+
return 0;
}
core_initcall(init_per_zone_wmark_min)
@@ -6846,43 +6847,58 @@
}
#ifdef CONFIG_NUMA
-int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *length, loff_t *ppos)
+static void setup_min_unmapped_ratio(void)
{
- struct pglist_data *pgdat;
+ pg_data_t *pgdat;
struct zone *zone;
- int rc;
-
- rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
- if (rc)
- return rc;
for_each_online_pgdat(pgdat)
- pgdat->min_slab_pages = 0;
+ pgdat->min_unmapped_pages = 0;
for_each_zone(zone)
zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
sysctl_min_unmapped_ratio) / 100;
- return 0;
}
-int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
+
+int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
- struct pglist_data *pgdat;
- struct zone *zone;
int rc;
rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
if (rc)
return rc;
+ setup_min_unmapped_ratio();
+
+ return 0;
+}
+
+static void setup_min_slab_ratio(void)
+{
+ pg_data_t *pgdat;
+ struct zone *zone;
+
for_each_online_pgdat(pgdat)
pgdat->min_slab_pages = 0;
for_each_zone(zone)
zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
sysctl_min_slab_ratio) / 100;
+}
+
+int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *length, loff_t *ppos)
+{
+ int rc;
+
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+ if (rc)
+ return rc;
+
+ setup_min_slab_ratio();
+
return 0;
}
#endif
diff --git a/mm/rmap.c b/mm/rmap.c
index 709bc83..1ef3640 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1284,8 +1284,9 @@
VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
} else {
- if (PageTransCompound(page)) {
- VM_BUG_ON_PAGE(!PageLocked(page), page);
+ if (PageTransCompound(page) && page_mapping(page)) {
+ VM_WARN_ON_ONCE(!PageLocked(page));
+
SetPageDoubleMap(compound_head(page));
if (PageMlocked(page))
clear_page_mlock(compound_head(page));
@@ -1303,7 +1304,7 @@
{
int i, nr = 1;
- VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
+ VM_BUG_ON_PAGE(compound && !PageHead(page), page);
lock_page_memcg(page);
/* Hugepages are not counted in NR_FILE_MAPPED for now. */
diff --git a/mm/shmem.c b/mm/shmem.c
index 7f7748a..fd8b2b5 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3975,7 +3975,9 @@
struct kobj_attribute shmem_enabled_attr =
__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
bool shmem_huge_enabled(struct vm_area_struct *vma)
{
struct inode *inode = file_inode(vma->vm_file);
@@ -4006,7 +4008,7 @@
return false;
}
}
-#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
#else /* !CONFIG_SHMEM */
diff --git a/mm/slab.c b/mm/slab.c
index 261147b..b672710 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4441,6 +4441,36 @@
module_init(slab_proc_init);
#endif
+#ifdef CONFIG_HARDENED_USERCOPY
+/*
+ * Rejects objects that are incorrectly sized.
+ *
+ * Returns NULL if check passes, otherwise const char * to name of cache
+ * to indicate an error.
+ */
+const char *__check_heap_object(const void *ptr, unsigned long n,
+ struct page *page)
+{
+ struct kmem_cache *cachep;
+ unsigned int objnr;
+ unsigned long offset;
+
+ /* Find and validate object. */
+ cachep = page->slab_cache;
+ objnr = obj_to_index(cachep, page, (void *)ptr);
+ BUG_ON(objnr >= cachep->num);
+
+ /* Find offset within object. */
+ offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
+
+ /* Allow address range falling entirely within object size. */
+ if (offset <= cachep->object_size && n <= cachep->object_size - offset)
+ return NULL;
+
+ return cachep->name;
+}
+#endif /* CONFIG_HARDENED_USERCOPY */
+
/**
* ksize - get the actual amount of memory allocated for a given object
* @objp: Pointer to the object
diff --git a/mm/slub.c b/mm/slub.c
index 850737b..9adae58 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3629,6 +3629,7 @@
*/
static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
{
+ LIST_HEAD(discard);
struct page *page, *h;
BUG_ON(irqs_disabled());
@@ -3636,13 +3637,16 @@
list_for_each_entry_safe(page, h, &n->partial, lru) {
if (!page->inuse) {
remove_partial(n, page);
- discard_slab(s, page);
+ list_add(&page->lru, &discard);
} else {
list_slab_objects(s, page,
"Objects remaining in %s on __kmem_cache_shutdown()");
}
}
spin_unlock_irq(&n->list_lock);
+
+ list_for_each_entry_safe(page, h, &discard, lru)
+ discard_slab(s, page);
}
/*
@@ -3764,6 +3768,46 @@
EXPORT_SYMBOL(__kmalloc_node);
#endif
+#ifdef CONFIG_HARDENED_USERCOPY
+/*
+ * Rejects objects that are incorrectly sized.
+ *
+ * Returns NULL if check passes, otherwise const char * to name of cache
+ * to indicate an error.
+ */
+const char *__check_heap_object(const void *ptr, unsigned long n,
+ struct page *page)
+{
+ struct kmem_cache *s;
+ unsigned long offset;
+ size_t object_size;
+
+ /* Find object and usable object size. */
+ s = page->slab_cache;
+ object_size = slab_ksize(s);
+
+ /* Reject impossible pointers. */
+ if (ptr < page_address(page))
+ return s->name;
+
+ /* Find offset within object. */
+ offset = (ptr - page_address(page)) % s->size;
+
+ /* Adjust for redzone and reject if within the redzone. */
+ if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
+ if (offset < s->red_left_pad)
+ return s->name;
+ offset -= s->red_left_pad;
+ }
+
+ /* Allow address range falling entirely within object size. */
+ if (offset <= object_size && n <= object_size - offset)
+ return NULL;
+
+ return s->name;
+}
+#endif /* CONFIG_HARDENED_USERCOPY */
+
static size_t __ksize(const void *object)
{
struct page *page;
diff --git a/mm/usercopy.c b/mm/usercopy.c
new file mode 100644
index 0000000..8ebae91
--- /dev/null
+++ b/mm/usercopy.c
@@ -0,0 +1,268 @@
+/*
+ * This implements the various checks for CONFIG_HARDENED_USERCOPY*,
+ * which are designed to protect kernel memory from needless exposure
+ * and overwrite under many unintended conditions. This code is based
+ * on PAX_USERCOPY, which is:
+ *
+ * Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source
+ * Security Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/sections.h>
+
+enum {
+ BAD_STACK = -1,
+ NOT_STACK = 0,
+ GOOD_FRAME,
+ GOOD_STACK,
+};
+
+/*
+ * Checks if a given pointer and length is contained by the current
+ * stack frame (if possible).
+ *
+ * Returns:
+ * NOT_STACK: not at all on the stack
+ * GOOD_FRAME: fully within a valid stack frame
+ * GOOD_STACK: fully on the stack (when can't do frame-checking)
+ * BAD_STACK: error condition (invalid stack position or bad stack frame)
+ */
+static noinline int check_stack_object(const void *obj, unsigned long len)
+{
+ const void * const stack = task_stack_page(current);
+ const void * const stackend = stack + THREAD_SIZE;
+ int ret;
+
+ /* Object is not on the stack at all. */
+ if (obj + len <= stack || stackend <= obj)
+ return NOT_STACK;
+
+ /*
+ * Reject: object partially overlaps the stack (passing the
+ * the check above means at least one end is within the stack,
+ * so if this check fails, the other end is outside the stack).
+ */
+ if (obj < stack || stackend < obj + len)
+ return BAD_STACK;
+
+ /* Check if object is safely within a valid frame. */
+ ret = arch_within_stack_frames(stack, stackend, obj, len);
+ if (ret)
+ return ret;
+
+ return GOOD_STACK;
+}
+
+static void report_usercopy(const void *ptr, unsigned long len,
+ bool to_user, const char *type)
+{
+ pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n",
+ to_user ? "exposure" : "overwrite",
+ to_user ? "from" : "to", ptr, type ? : "unknown", len);
+ /*
+ * For greater effect, it would be nice to do do_group_exit(),
+ * but BUG() actually hooks all the lock-breaking and per-arch
+ * Oops code, so that is used here instead.
+ */
+ BUG();
+}
+
+/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */
+static bool overlaps(const void *ptr, unsigned long n, unsigned long low,
+ unsigned long high)
+{
+ unsigned long check_low = (uintptr_t)ptr;
+ unsigned long check_high = check_low + n;
+
+ /* Does not overlap if entirely above or entirely below. */
+ if (check_low >= high || check_high < low)
+ return false;
+
+ return true;
+}
+
+/* Is this address range in the kernel text area? */
+static inline const char *check_kernel_text_object(const void *ptr,
+ unsigned long n)
+{
+ unsigned long textlow = (unsigned long)_stext;
+ unsigned long texthigh = (unsigned long)_etext;
+ unsigned long textlow_linear, texthigh_linear;
+
+ if (overlaps(ptr, n, textlow, texthigh))
+ return "<kernel text>";
+
+ /*
+ * Some architectures have virtual memory mappings with a secondary
+ * mapping of the kernel text, i.e. there is more than one virtual
+ * kernel address that points to the kernel image. It is usually
+ * when there is a separate linear physical memory mapping, in that
+ * __pa() is not just the reverse of __va(). This can be detected
+ * and checked:
+ */
+ textlow_linear = (unsigned long)__va(__pa(textlow));
+ /* No different mapping: we're done. */
+ if (textlow_linear == textlow)
+ return NULL;
+
+ /* Check the secondary mapping... */
+ texthigh_linear = (unsigned long)__va(__pa(texthigh));
+ if (overlaps(ptr, n, textlow_linear, texthigh_linear))
+ return "<linear kernel text>";
+
+ return NULL;
+}
+
+static inline const char *check_bogus_address(const void *ptr, unsigned long n)
+{
+ /* Reject if object wraps past end of memory. */
+ if (ptr + n < ptr)
+ return "<wrapped address>";
+
+ /* Reject if NULL or ZERO-allocation. */
+ if (ZERO_OR_NULL_PTR(ptr))
+ return "<null>";
+
+ return NULL;
+}
+
+static inline const char *check_heap_object(const void *ptr, unsigned long n,
+ bool to_user)
+{
+ struct page *page, *endpage;
+ const void *end = ptr + n - 1;
+ bool is_reserved, is_cma;
+
+ /*
+ * Some architectures (arm64) return true for virt_addr_valid() on
+ * vmalloced addresses. Work around this by checking for vmalloc
+ * first.
+ */
+ if (is_vmalloc_addr(ptr))
+ return NULL;
+
+ if (!virt_addr_valid(ptr))
+ return NULL;
+
+ page = virt_to_head_page(ptr);
+
+ /* Check slab allocator for flags and size. */
+ if (PageSlab(page))
+ return __check_heap_object(ptr, n, page);
+
+ /*
+ * Sometimes the kernel data regions are not marked Reserved (see
+ * check below). And sometimes [_sdata,_edata) does not cover
+ * rodata and/or bss, so check each range explicitly.
+ */
+
+ /* Allow reads of kernel rodata region (if not marked as Reserved). */
+ if (ptr >= (const void *)__start_rodata &&
+ end <= (const void *)__end_rodata) {
+ if (!to_user)
+ return "<rodata>";
+ return NULL;
+ }
+
+ /* Allow kernel data region (if not marked as Reserved). */
+ if (ptr >= (const void *)_sdata && end <= (const void *)_edata)
+ return NULL;
+
+ /* Allow kernel bss region (if not marked as Reserved). */
+ if (ptr >= (const void *)__bss_start &&
+ end <= (const void *)__bss_stop)
+ return NULL;
+
+ /* Is the object wholly within one base page? */
+ if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) ==
+ ((unsigned long)end & (unsigned long)PAGE_MASK)))
+ return NULL;
+
+ /* Allow if start and end are inside the same compound page. */
+ endpage = virt_to_head_page(end);
+ if (likely(endpage == page))
+ return NULL;
+
+ /*
+ * Reject if range is entirely either Reserved (i.e. special or
+ * device memory), or CMA. Otherwise, reject since the object spans
+ * several independently allocated pages.
+ */
+ is_reserved = PageReserved(page);
+ is_cma = is_migrate_cma_page(page);
+ if (!is_reserved && !is_cma)
+ goto reject;
+
+ for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
+ page = virt_to_head_page(ptr);
+ if (is_reserved && !PageReserved(page))
+ goto reject;
+ if (is_cma && !is_migrate_cma_page(page))
+ goto reject;
+ }
+
+ return NULL;
+
+reject:
+ return "<spans multiple pages>";
+}
+
+/*
+ * Validates that the given object is:
+ * - not bogus address
+ * - known-safe heap or stack object
+ * - not in kernel text
+ */
+void __check_object_size(const void *ptr, unsigned long n, bool to_user)
+{
+ const char *err;
+
+ /* Skip all tests if size is zero. */
+ if (!n)
+ return;
+
+ /* Check for invalid addresses. */
+ err = check_bogus_address(ptr, n);
+ if (err)
+ goto report;
+
+ /* Check for bad heap object. */
+ err = check_heap_object(ptr, n, to_user);
+ if (err)
+ goto report;
+
+ /* Check for bad stack object. */
+ switch (check_stack_object(ptr, n)) {
+ case NOT_STACK:
+ /* Object is not touching the current process stack. */
+ break;
+ case GOOD_FRAME:
+ case GOOD_STACK:
+ /*
+ * Object is either in the correct frame (when it
+ * is possible to check) or just generally on the
+ * process stack (when frame checking not available).
+ */
+ return;
+ default:
+ err = "<process stack>";
+ goto report;
+ }
+
+ /* Check for object in kernel to avoid text exposure. */
+ err = check_kernel_text_object(ptr, n);
+ if (!err)
+ return;
+
+report:
+ report_usercopy(ptr, n, to_user, err);
+}
+EXPORT_SYMBOL(__check_object_size);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 82a116b..8de138d 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -169,7 +169,7 @@
if (err < 0)
goto out_uninit_mvrp;
- vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1;
+ vlan->nest_level = dev_get_nest_level(real_dev) + 1;
err = register_netdevice(dev);
if (err < 0)
goto out_uninit_mvrp;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 4acb1d5..f24b25c 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -507,8 +507,8 @@
/* wakeup anybody waiting for slots to pin pages */
wake_up(&vp_wq);
}
- kfree(in_pages);
- kfree(out_pages);
+ kvfree(in_pages);
+ kvfree(out_pages);
return err;
}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c18080a..cd620fa 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -267,7 +267,7 @@
/* If old entry was unassociated with any port, then delete it. */
f = __br_fdb_get(br, br->dev->dev_addr, 0);
- if (f && f->is_local && !f->dst)
+ if (f && f->is_local && !f->dst && !f->added_by_user)
fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, 0);
@@ -282,7 +282,7 @@
if (!br_vlan_should_use(v))
continue;
f = __br_fdb_get(br, br->dev->dev_addr, v->vid);
- if (f && f->is_local && !f->dst)
+ if (f && f->is_local && !f->dst && !f->added_by_user)
fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, v->vid);
}
@@ -764,20 +764,25 @@
}
/* Update (create or replace) forwarding database entry */
-static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
- __u16 state, __u16 flags, __u16 vid)
+static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
+ const __u8 *addr, __u16 state, __u16 flags, __u16 vid)
{
- struct net_bridge *br = source->br;
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
bool modified = false;
/* If the port cannot learn allow only local and static entries */
- if (!(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
+ if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
!(source->state == BR_STATE_LEARNING ||
source->state == BR_STATE_FORWARDING))
return -EPERM;
+ if (!source && !(state & NUD_PERMANENT)) {
+ pr_info("bridge: RTM_NEWNEIGH %s without NUD_PERMANENT\n",
+ br->dev->name);
+ return -EINVAL;
+ }
+
fdb = fdb_find(head, addr, vid);
if (fdb == NULL) {
if (!(flags & NLM_F_CREATE))
@@ -832,22 +837,28 @@
return 0;
}
-static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
- const unsigned char *addr, u16 nlh_flags, u16 vid)
+static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
+ struct net_bridge_port *p, const unsigned char *addr,
+ u16 nlh_flags, u16 vid)
{
int err = 0;
if (ndm->ndm_flags & NTF_USE) {
+ if (!p) {
+ pr_info("bridge: RTM_NEWNEIGH %s with NTF_USE is not supported\n",
+ br->dev->name);
+ return -EINVAL;
+ }
local_bh_disable();
rcu_read_lock();
- br_fdb_update(p->br, p, addr, vid, true);
+ br_fdb_update(br, p, addr, vid, true);
rcu_read_unlock();
local_bh_enable();
} else {
- spin_lock_bh(&p->br->hash_lock);
- err = fdb_add_entry(p, addr, ndm->ndm_state,
+ spin_lock_bh(&br->hash_lock);
+ err = fdb_add_entry(br, p, addr, ndm->ndm_state,
nlh_flags, vid);
- spin_unlock_bh(&p->br->hash_lock);
+ spin_unlock_bh(&br->hash_lock);
}
return err;
@@ -884,6 +895,7 @@
dev->name);
return -EINVAL;
}
+ br = p->br;
vg = nbp_vlan_group(p);
}
@@ -895,15 +907,9 @@
}
/* VID was specified, so use it. */
- if (dev->priv_flags & IFF_EBRIDGE)
- err = br_fdb_insert(br, NULL, addr, vid);
- else
- err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid);
} else {
- if (dev->priv_flags & IFF_EBRIDGE)
- err = br_fdb_insert(br, NULL, addr, 0);
- else
- err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0);
if (err || !vg || !vg->num_vlans)
goto out;
@@ -914,11 +920,7 @@
list_for_each_entry(v, &vg->vlan_list, vlist) {
if (!br_vlan_should_use(v))
continue;
- if (dev->priv_flags & IFF_EBRIDGE)
- err = br_fdb_insert(br, NULL, addr, v->vid);
- else
- err = __br_fdb_add(ndm, p, addr, nlh_flags,
- v->vid);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid);
if (err)
goto out;
}
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index c83326c..ef34a02 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -574,7 +574,7 @@
put_generic_request(req);
}
-void cancel_generic_request(struct ceph_mon_generic_request *req)
+static void cancel_generic_request(struct ceph_mon_generic_request *req)
{
struct ceph_mon_client *monc = req->monc;
struct ceph_mon_generic_request *lookup_req;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b5ec096..a97e7b5 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4220,7 +4220,7 @@
pages = ceph_alloc_page_vector(calc_pages_for(0, data_len),
GFP_NOIO);
- if (!pages) {
+ if (IS_ERR(pages)) {
ceph_msg_put(m);
return NULL;
}
diff --git a/net/ceph/string_table.c b/net/ceph/string_table.c
index ca53c83..22fb96e 100644
--- a/net/ceph/string_table.c
+++ b/net/ceph/string_table.c
@@ -84,12 +84,6 @@
}
EXPORT_SYMBOL(ceph_find_or_create_string);
-static void ceph_free_string(struct rcu_head *head)
-{
- struct ceph_string *cs = container_of(head, struct ceph_string, rcu);
- kfree(cs);
-}
-
void ceph_release_string(struct kref *ref)
{
struct ceph_string *cs = container_of(ref, struct ceph_string, kref);
@@ -101,7 +95,7 @@
}
spin_unlock(&string_tree_lock);
- call_rcu(&cs->rcu, ceph_free_string);
+ kfree_rcu(cs, rcu);
}
EXPORT_SYMBOL(ceph_release_string);
diff --git a/net/core/dev.c b/net/core/dev.c
index 4ce07dc..dd6ce59 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6045,8 +6045,7 @@
EXPORT_SYMBOL(netdev_lower_dev_get_private);
-int dev_get_nest_level(struct net_device *dev,
- bool (*type_check)(const struct net_device *dev))
+int dev_get_nest_level(struct net_device *dev)
{
struct net_device *lower = NULL;
struct list_head *iter;
@@ -6056,15 +6055,12 @@
ASSERT_RTNL();
netdev_for_each_lower_dev(dev, lower, iter) {
- nest = dev_get_nest_level(lower, type_check);
+ nest = dev_get_nest_level(lower);
if (max_nest < nest)
max_nest = nest;
}
- if (type_check(dev))
- max_nest++;
-
- return max_nest;
+ return max_nest + 1;
}
EXPORT_SYMBOL(dev_get_nest_level);
diff --git a/net/core/filter.c b/net/core/filter.c
index 5708999..cb06ace 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1355,56 +1355,47 @@
{
int err;
- if (!skb_cloned(skb))
- return 0;
- if (skb_clone_writable(skb, write_len))
- return 0;
- err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- if (!err)
- bpf_compute_data_end(skb);
+ err = skb_ensure_writable(skb, write_len);
+ bpf_compute_data_end(skb);
+
return err;
}
+static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
+{
+ if (skb_at_tc_ingress(skb))
+ skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+}
+
+static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
+{
+ if (skb_at_tc_ingress(skb))
+ skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+}
+
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
{
- struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
struct sk_buff *skb = (struct sk_buff *) (long) r1;
- int offset = (int) r2;
+ unsigned int offset = (unsigned int) r2;
void *from = (void *) (long) r3;
unsigned int len = (unsigned int) r4;
void *ptr;
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
return -EINVAL;
-
- /* bpf verifier guarantees that:
- * 'from' pointer points to bpf program stack
- * 'len' bytes of it were initialized
- * 'len' > 0
- * 'skb' is a valid pointer to 'struct sk_buff'
- *
- * so check for invalid 'offset' and too large 'len'
- */
- if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
+ if (unlikely(offset > 0xffff))
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
- ptr = skb_header_pointer(skb, offset, len, sp->buff);
- if (unlikely(!ptr))
- return -EFAULT;
-
+ ptr = skb->data + offset;
if (flags & BPF_F_RECOMPUTE_CSUM)
- skb_postpull_rcsum(skb, ptr, len);
+ __skb_postpull_rcsum(skb, ptr, len, offset);
memcpy(ptr, from, len);
- if (ptr == sp->buff)
- /* skb_store_bits cannot return -EFAULT here */
- skb_store_bits(skb, offset, ptr, len);
-
if (flags & BPF_F_RECOMPUTE_CSUM)
- skb_postpush_rcsum(skb, ptr, len);
+ __skb_postpush_rcsum(skb, ptr, len, offset);
if (flags & BPF_F_INVALIDATE_HASH)
skb_clear_hash(skb);
@@ -1425,12 +1416,12 @@
static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1;
- int offset = (int) r2;
+ unsigned int offset = (unsigned int) r2;
void *to = (void *)(unsigned long) r3;
unsigned int len = (unsigned int) r4;
void *ptr;
- if (unlikely((u32) offset > 0xffff))
+ if (unlikely(offset > 0xffff))
goto err_clear;
ptr = skb_header_pointer(skb, offset, len, to);
@@ -1458,20 +1449,17 @@
static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
- int offset = (int) r2;
- __sum16 sum, *ptr;
+ unsigned int offset = (unsigned int) r2;
+ __sum16 *ptr;
if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
return -EINVAL;
- if (unlikely((u32) offset > 0xffff))
+ if (unlikely(offset > 0xffff || offset & 1))
return -EFAULT;
- if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
+ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
return -EFAULT;
- ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
- if (unlikely(!ptr))
- return -EFAULT;
-
+ ptr = (__sum16 *)(skb->data + offset);
switch (flags & BPF_F_HDR_FIELD_MASK) {
case 0:
if (unlikely(from != 0))
@@ -1489,10 +1477,6 @@
return -EINVAL;
}
- if (ptr == &sum)
- /* skb_store_bits guaranteed to not return -EFAULT here */
- skb_store_bits(skb, offset, ptr, sizeof(sum));
-
return 0;
}
@@ -1512,20 +1496,18 @@
struct sk_buff *skb = (struct sk_buff *) (long) r1;
bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
- int offset = (int) r2;
- __sum16 sum, *ptr;
+ unsigned int offset = (unsigned int) r2;
+ __sum16 *ptr;
if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
BPF_F_HDR_FIELD_MASK)))
return -EINVAL;
- if (unlikely((u32) offset > 0xffff))
+ if (unlikely(offset > 0xffff || offset & 1))
return -EFAULT;
- if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
+ if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
return -EFAULT;
- ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
- if (unlikely(!ptr))
- return -EFAULT;
+ ptr = (__sum16 *)(skb->data + offset);
if (is_mmzero && !*ptr)
return 0;
@@ -1548,10 +1530,6 @@
if (is_mmzero && !*ptr)
*ptr = CSUM_MANGLED_0;
- if (ptr == &sum)
- /* skb_store_bits guaranteed to not return -EFAULT here */
- skb_store_bits(skb, offset, ptr, sizeof(sum));
-
return 0;
}
@@ -1607,9 +1585,6 @@
static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
{
- if (skb_at_tc_ingress(skb))
- skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
-
return dev_forward_skb(dev, skb);
}
@@ -1648,6 +1623,8 @@
if (unlikely(!skb))
return -ENOMEM;
+ bpf_push_mac_rcsum(skb);
+
return flags & BPF_F_INGRESS ?
__bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
}
@@ -1693,6 +1670,8 @@
return -EINVAL;
}
+ bpf_push_mac_rcsum(skb);
+
return ri->flags & BPF_F_INGRESS ?
__bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
}
@@ -1756,7 +1735,10 @@
vlan_proto != htons(ETH_P_8021AD)))
vlan_proto = htons(ETH_P_8021Q);
+ bpf_push_mac_rcsum(skb);
ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
+ bpf_pull_mac_rcsum(skb);
+
bpf_compute_data_end(skb);
return ret;
}
@@ -1776,7 +1758,10 @@
struct sk_buff *skb = (struct sk_buff *) (long) r1;
int ret;
+ bpf_push_mac_rcsum(skb);
ret = skb_vlan_pop(skb);
+ bpf_pull_mac_rcsum(skb);
+
bpf_compute_data_end(skb);
return ret;
}
@@ -2298,7 +2283,7 @@
}
#ifdef CONFIG_SOCK_CGROUP_DATA
-static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *)(long)r1;
struct bpf_map *map = (struct bpf_map *)(long)r2;
@@ -2321,8 +2306,8 @@
return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
}
-static const struct bpf_func_proto bpf_skb_in_cgroup_proto = {
- .func = bpf_skb_in_cgroup,
+static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
+ .func = bpf_skb_under_cgroup,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
@@ -2402,8 +2387,8 @@
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
#ifdef CONFIG_SOCK_CGROUP_DATA
- case BPF_FUNC_skb_in_cgroup:
- return &bpf_skb_in_cgroup_proto;
+ case BPF_FUNC_skb_under_cgroup:
+ return &bpf_skb_under_cgroup_proto;
#endif
default:
return sk_filter_func_proto(func_id);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d07fc07..febca0f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2452,9 +2452,7 @@
static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
loff_t pos)
{
- struct fib_table *tb = iter->main_tb;
struct key_vector *l, **tp = &iter->tnode;
- struct trie *t;
t_key key;
/* use cache location of next-to-find key */
@@ -2462,8 +2460,6 @@
pos -= iter->pos;
key = iter->key;
} else {
- t = (struct trie *)tb->tb_data;
- iter->tnode = t->kv;
iter->pos = 0;
key = 0;
}
@@ -2504,12 +2500,12 @@
return NULL;
iter->main_tb = tb;
+ t = (struct trie *)tb->tb_data;
+ iter->tnode = t->kv;
if (*pos != 0)
return fib_route_get_idx(iter, *pos);
- t = (struct trie *)tb->tb_data;
- iter->tnode = t->kv;
iter->pos = 0;
iter->key = 0;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5b1481b..113cc43 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -370,7 +370,6 @@
tunnel->parms.o_flags, proto, tunnel->parms.o_key,
htonl(tunnel->o_seqno));
- skb_set_inner_protocol(skb, proto);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index a917903..cc701fa 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -557,6 +557,33 @@
.get_link_net = ip_tunnel_get_link_net,
};
+static bool is_vti_tunnel(const struct net_device *dev)
+{
+ return dev->netdev_ops == &vti_netdev_ops;
+}
+
+static int vti_device_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ if (!is_vti_tunnel(dev))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_DOWN:
+ if (!net_eq(tunnel->net, dev_net(dev)))
+ xfrm_garbage_collect(tunnel->net);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block vti_notifier_block __read_mostly = {
+ .notifier_call = vti_device_event,
+};
+
static int __init vti_init(void)
{
const char *msg;
@@ -564,6 +591,8 @@
pr_info("IPv4 over IPsec tunneling driver\n");
+ register_netdevice_notifier(&vti_notifier_block);
+
msg = "tunnel device";
err = register_pernet_device(&vti_net_ops);
if (err < 0)
@@ -596,6 +625,7 @@
xfrm_proto_esp_failed:
unregister_pernet_device(&vti_net_ops);
pernet_dev_failed:
+ unregister_netdevice_notifier(&vti_notifier_block);
pr_err("vti init: failed to register %s\n", msg);
return err;
}
@@ -607,6 +637,7 @@
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
unregister_pernet_device(&vti_net_ops);
+ unregister_netdevice_notifier(&vti_notifier_block);
}
module_init(vti_init);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ab3e796..df8425f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3543,7 +3543,7 @@
/* combine the user config with event to determine if permanent
* addresses are to be removed from address hash table
*/
- keep_addr = !(how || _keep_addr <= 0);
+ keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6);
/* Step 2: clear hash table */
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
@@ -3599,7 +3599,7 @@
/* re-combine the user config with event to determine if permanent
* addresses are to be removed from the interface list
*/
- keep_addr = (!how && _keep_addr > 0);
+ keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
INIT_LIST_HEAD(&del_list);
list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index c53b92c..37ac9de 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -952,8 +952,10 @@
memcpy(new, hop, start);
ret_val = calipso_genopt((unsigned char *)new, start, buf_len, doi_def,
secattr);
- if (ret_val < 0)
+ if (ret_val < 0) {
+ kfree(new);
return ERR_PTR(ret_val);
+ }
buf_len = start + ret_val;
/* At this point buf_len aligns to 4n, so (buf_len & 4) pads to 8n */
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 776d145..704274c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -519,8 +519,6 @@
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
- skb_set_inner_protocol(skb, protocol);
-
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE);
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index fed40d1..0900352 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -55,7 +55,7 @@
struct icmp6hdr user_icmph;
int addr_type;
struct in6_addr *daddr;
- int iif = 0;
+ int oif = 0;
struct flowi6 fl6;
int err;
struct dst_entry *dst;
@@ -78,25 +78,30 @@
if (u->sin6_family != AF_INET6) {
return -EAFNOSUPPORT;
}
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != u->sin6_scope_id) {
- return -EINVAL;
- }
daddr = &(u->sin6_addr);
- iif = u->sin6_scope_id;
+ if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
+ oif = u->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
daddr = &sk->sk_v6_daddr;
}
- if (!iif)
- iif = sk->sk_bound_dev_if;
+ if (!oif)
+ oif = sk->sk_bound_dev_if;
+
+ if (!oif)
+ oif = np->sticky_pktinfo.ipi6_ifindex;
+
+ if (!oif && ipv6_addr_is_multicast(daddr))
+ oif = np->mcast_oif;
+ else if (!oif)
+ oif = np->ucast_oif;
addr_type = ipv6_addr_type(daddr);
- if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
- return -EINVAL;
- if (addr_type & IPV6_ADDR_MAPPED)
+ if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
+ (addr_type & IPV6_ADDR_MAPPED) ||
+ (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
return -EINVAL;
/* TODO: use ip6_datagram_send_ctl to get options from cmsg */
@@ -106,16 +111,12 @@
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.saddr = np->saddr;
fl6.daddr = *daddr;
+ fl6.flowi6_oif = oif;
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
- else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
-
ipc6.tclass = np->tclass;
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 4a7ae32a..1138eaf 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -185,8 +185,12 @@
self->magic = IAS_MAGIC;
self->mode = mode;
- if (mode == IAS_CLIENT)
- iriap_register_lsap(self, slsap_sel, mode);
+ if (mode == IAS_CLIENT) {
+ if (iriap_register_lsap(self, slsap_sel, mode)) {
+ kfree(self);
+ return NULL;
+ }
+ }
self->confirm = callback;
self->priv = priv;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 47e99ab8..543b1d4 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -869,7 +869,7 @@
/* free all potentially still buffered bcast frames */
local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf);
- skb_queue_purge(&sdata->u.ap.ps.bc_buf);
+ ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf);
mutex_lock(&local->mtx);
ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 184473c..ba5fc1f 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1094,7 +1094,7 @@
trace_drv_get_expected_throughput(sta);
if (local->ops->get_expected_throughput)
- ret = local->ops->get_expected_throughput(sta);
+ ret = local->ops->get_expected_throughput(&local->hw, sta);
trace_drv_return_u32(local, ret);
return ret;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index c66411d..42120d9 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -881,20 +881,22 @@
netif_carrier_off(sdata->dev);
+ /* flush STAs and mpaths on this iface */
+ sta_info_flush(sdata);
+ mesh_path_flush_by_iface(sdata);
+
/* stop the beacon */
ifmsh->mesh_id_len = 0;
sdata->vif.bss_conf.enable_beacon = false;
clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+
+ /* remove beacon */
bcn = rcu_dereference_protected(ifmsh->beacon,
lockdep_is_held(&sdata->wdev.mtx));
RCU_INIT_POINTER(ifmsh->beacon, NULL);
kfree_rcu(bcn, rcu_head);
- /* flush STAs and mpaths on this iface */
- sta_info_flush(sdata);
- mesh_path_flush_by_iface(sdata);
-
/* free all potentially still buffered group-addressed frames */
local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf);
skb_queue_purge(&ifmsh->ps.bc_buf);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2e8a902..9dce3b1 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1268,7 +1268,7 @@
for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
- if (!txqi->tin.backlog_packets)
+ if (txqi->tin.backlog_packets)
set_bit(tid, &sta->txq_buffered_tids);
else
clear_bit(tid, &sta->txq_buffered_tids);
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index c6d5c72..a2a6826 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -771,6 +771,13 @@
clear_sta_flag(sta, WLAN_STA_SP);
acked = !!(info->flags & IEEE80211_TX_STAT_ACK);
+
+ /* mesh Peer Service Period support */
+ if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
+ ieee80211_is_data_qos(fc))
+ ieee80211_mpsp_trigger_process(
+ ieee80211_get_qos_ctl(hdr), sta, true, acked);
+
if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) {
/*
* The STA is in power save mode, so assume
@@ -781,13 +788,6 @@
return;
}
- /* mesh Peer Service Period support */
- if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
- ieee80211_is_data_qos(fc))
- ieee80211_mpsp_trigger_process(
- ieee80211_get_qos_ctl(hdr),
- sta, true, acked);
-
if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
(ieee80211_is_data(hdr->frame_control)) &&
(rates_idx != -1))
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 91461c4..5023966 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -368,7 +368,7 @@
skb = skb_dequeue(&ps->bc_buf);
if (skb) {
purged++;
- dev_kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
}
total += skb_queue_len(&ps->bc_buf);
}
@@ -451,7 +451,7 @@
if (skb_queue_len(&ps->bc_buf) >= AP_MAX_BC_BUFFER) {
ps_dbg(tx->sdata,
"BC TX buffer full - dropping the oldest frame\n");
- dev_kfree_skb(skb_dequeue(&ps->bc_buf));
+ ieee80211_free_txskb(&tx->local->hw, skb_dequeue(&ps->bc_buf));
} else
tx->local->total_ps_buffered++;
@@ -4275,7 +4275,7 @@
sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev);
if (!ieee80211_tx_prepare(sdata, &tx, NULL, skb))
break;
- dev_kfree_skb_any(skb);
+ ieee80211_free_txskb(hw, skb);
}
info = IEEE80211_SKB_CB(skb);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 9e36931..f8dbacf 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -574,7 +574,7 @@
helper = rcu_dereference(nfct_help(expect->master)->helper);
if (helper) {
seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
- if (helper->expect_policy[expect->class].name)
+ if (helper->expect_policy[expect->class].name[0])
seq_printf(s, "/%s",
helper->expect_policy[expect->class].name);
}
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index bb77a97..5c0db5c 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1473,7 +1473,8 @@
"timeout to %u seconds for",
info->timeout);
nf_ct_dump_tuple(&exp->tuple);
- mod_timer(&exp->timeout, jiffies + info->timeout * HZ);
+ mod_timer_pending(&exp->timeout,
+ jiffies + info->timeout * HZ);
}
spin_unlock_bh(&nf_conntrack_expect_lock);
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 050bb34..fdfc71f 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1894,6 +1894,8 @@
if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
return -EINVAL;
+ if (otuple.dst.protonum != rtuple.dst.protonum)
+ return -EINVAL;
ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple,
&rtuple, u3);
@@ -2362,12 +2364,8 @@
return PTR_ERR(exp);
err = nf_ct_expect_related_report(exp, portid, report);
- if (err < 0) {
- nf_ct_expect_put(exp);
- return err;
- }
-
- return 0;
+ nf_ct_expect_put(exp);
+ return err;
}
static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 8d9db9d..7d77217 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1383,7 +1383,7 @@
return NF_DROP;
}
cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
- if (!cseq) {
+ if (!cseq && *(*dptr + matchoff) != '0') {
nf_ct_helper_log(skb, ct, "cannot get cseq");
return NF_DROP;
}
@@ -1446,7 +1446,7 @@
return NF_DROP;
}
cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
- if (!cseq) {
+ if (!cseq && *(*dptr + matchoff) != '0') {
nf_ct_helper_log(skb, ct, "cannot get cseq");
return NF_DROP;
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 5d36a09..f49f450 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1145,10 +1145,8 @@
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
int err;
- queue = instance_lookup(q, queue_num);
- if (!queue)
- queue = verdict_instance_lookup(q, queue_num,
- NETLINK_CB(skb).portid);
+ queue = verdict_instance_lookup(q, queue_num,
+ NETLINK_CB(skb).portid);
if (IS_ERR(queue))
return PTR_ERR(queue);
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index ba7aed1..82c264e 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -59,6 +59,7 @@
const struct nlattr * const tb[])
{
struct nft_exthdr *priv = nft_expr_priv(expr);
+ u32 offset, len;
if (tb[NFTA_EXTHDR_DREG] == NULL ||
tb[NFTA_EXTHDR_TYPE] == NULL ||
@@ -66,9 +67,15 @@
tb[NFTA_EXTHDR_LEN] == NULL)
return -EINVAL;
+ offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
+ len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+
+ if (offset > U8_MAX || len > U8_MAX)
+ return -ERANGE;
+
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
- priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
- priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+ priv->offset = offset;
+ priv->len = len;
priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
return nft_validate_register_store(ctx, priv->dreg, NULL,
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 6473936..ffe9ae0 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -70,7 +70,6 @@
} else if (d > 0)
parent = parent->rb_right;
else {
-found:
if (!nft_set_elem_active(&rbe->ext, genmask)) {
parent = parent->rb_left;
continue;
@@ -84,9 +83,12 @@
}
}
- if (set->flags & NFT_SET_INTERVAL && interval != NULL) {
- rbe = interval;
- goto found;
+ if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+ nft_set_elem_active(&interval->ext, genmask) &&
+ !nft_rbtree_interval_end(interval)) {
+ spin_unlock_bh(&nft_rbtree_lock);
+ *ext = &interval->ext;
+ return true;
}
out:
spin_unlock_bh(&nft_rbtree_lock);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c644c78..e054a74 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -433,7 +433,6 @@
struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
- enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
unsigned int dataoff;
u8 protonum;
@@ -458,13 +457,8 @@
ct = nf_ct_tuplehash_to_ctrack(h);
- ctinfo = ovs_ct_get_info(h);
- if (ctinfo == IP_CT_NEW) {
- /* This should not happen. */
- WARN_ONCE(1, "ovs_ct_find_existing: new packet for %p\n", ct);
- }
skb->nfct = &ct->ct_general;
- skb->nfctinfo = ctinfo;
+ skb->nfctinfo = ovs_ct_get_info(h);
return ct;
}
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 1a1fcec..5aaf3ba 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -93,7 +93,14 @@
return ERR_CAST(dev);
}
- dev_change_flags(dev, dev->flags | IFF_UP);
+ err = dev_change_flags(dev, dev->flags | IFF_UP);
+ if (err < 0) {
+ rtnl_delete_link(dev);
+ rtnl_unlock();
+ ovs_vport_free(vport);
+ goto error;
+ }
+
rtnl_unlock();
return vport;
error:
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 7f8897f..0e72d95 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -54,6 +54,7 @@
struct net *net = ovs_dp_get_net(parms->dp);
struct net_device *dev;
struct vport *vport;
+ int err;
vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms);
if (IS_ERR(vport))
@@ -67,9 +68,15 @@
return ERR_CAST(dev);
}
- dev_change_flags(dev, dev->flags | IFF_UP);
- rtnl_unlock();
+ err = dev_change_flags(dev, dev->flags | IFF_UP);
+ if (err < 0) {
+ rtnl_delete_link(dev);
+ rtnl_unlock();
+ ovs_vport_free(vport);
+ return ERR_PTR(err);
+ }
+ rtnl_unlock();
return vport;
}
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 434e04c..95c3614 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -140,7 +140,7 @@
static void internal_set_rx_headroom(struct net_device *dev, int new_hr)
{
- dev->needed_headroom = new_hr;
+ dev->needed_headroom = new_hr < 0 ? 0 : new_hr;
}
static const struct net_device_ops internal_dev_netdev_ops = {
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 5eb7694..7eb955e 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -130,7 +130,14 @@
return ERR_CAST(dev);
}
- dev_change_flags(dev, dev->flags | IFF_UP);
+ err = dev_change_flags(dev, dev->flags | IFF_UP);
+ if (err < 0) {
+ rtnl_delete_link(dev);
+ rtnl_unlock();
+ ovs_vport_free(vport);
+ goto error;
+ }
+
rtnl_unlock();
return vport;
error:
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 1bb9e7a..ff83fb1 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -425,6 +425,7 @@
spinlock_t lock;
rwlock_t state_lock; /* lock for state transition */
atomic_t usage;
+ atomic_t skb_count; /* Outstanding packets on this call */
atomic_t sequence; /* Tx data packet sequence counter */
u32 local_abort; /* local abort code */
u32 remote_abort; /* remote abort code */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 0b28321..9bae21e 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -130,6 +130,7 @@
call->state = RXRPC_CALL_SERVER_ACCEPTING;
list_add_tail(&call->accept_link, &rx->acceptq);
rxrpc_get_call(call);
+ atomic_inc(&call->skb_count);
nsp = rxrpc_skb(notification);
nsp->call = call;
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index fc32aa5..e60cf65 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -460,6 +460,7 @@
ASSERTCMP(sp->call, ==, NULL);
sp->call = call;
rxrpc_get_call(call);
+ atomic_inc(&call->skb_count);
/* insert into the buffer in sequence order */
spin_lock_bh(&call->lock);
@@ -734,6 +735,7 @@
skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
sp->call = call;
rxrpc_get_call(call);
+ atomic_inc(&call->skb_count);
spin_lock_bh(&call->lock);
if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
BUG();
@@ -793,6 +795,7 @@
sp->error = error;
sp->call = call;
rxrpc_get_call(call);
+ atomic_inc(&call->skb_count);
spin_lock_bh(&call->lock);
ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
@@ -834,6 +837,9 @@
return;
}
+ if (!call->conn)
+ goto skip_msg_init;
+
/* there's a good chance we're going to have to send a message, so set
* one up in advance */
msg.msg_name = &call->conn->params.peer->srx.transport;
@@ -856,6 +862,7 @@
memset(iov, 0, sizeof(iov));
iov[0].iov_base = &whdr;
iov[0].iov_len = sizeof(whdr);
+skip_msg_init:
/* deal with events of a final nature */
if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 91287c9..ae057e0 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -275,6 +275,7 @@
list_del_init(&call->link);
write_unlock_bh(&rxrpc_call_lock);
+ set_bit(RXRPC_CALL_RELEASED, &call->flags);
call->state = RXRPC_CALL_DEAD;
rxrpc_put_call(call);
_leave(" = %d", ret);
@@ -287,6 +288,7 @@
*/
found_user_ID_now_present:
write_unlock(&rx->call_lock);
+ set_bit(RXRPC_CALL_RELEASED, &call->flags);
call->state = RXRPC_CALL_DEAD;
rxrpc_put_call(call);
_leave(" = -EEXIST [%p]", call);
@@ -491,15 +493,9 @@
spin_lock_bh(&call->lock);
while ((skb = skb_dequeue(&call->rx_queue)) ||
(skb = skb_dequeue(&call->rx_oos_queue))) {
- sp = rxrpc_skb(skb);
- if (sp->call) {
- ASSERTCMP(sp->call, ==, call);
- rxrpc_put_call(call);
- sp->call = NULL;
- }
- skb->destructor = NULL;
spin_unlock_bh(&call->lock);
+ sp = rxrpc_skb(skb);
_debug("- zap %s %%%u #%u",
rxrpc_pkts[sp->hdr.type],
sp->hdr.serial, sp->hdr.seq);
@@ -605,6 +601,7 @@
if (atomic_dec_and_test(&call->usage)) {
_debug("call %d dead", call->debug_id);
+ WARN_ON(atomic_read(&call->skb_count) != 0);
ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
rxrpc_queue_work(&call->destroyer);
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 991a20d..70bb778 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -55,9 +55,6 @@
if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
_debug("already terminated");
ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE);
- skb->destructor = NULL;
- sp->call = NULL;
- rxrpc_put_call(call);
rxrpc_free_skb(skb);
return 0;
}
@@ -111,13 +108,7 @@
ret = 0;
out:
- /* release the socket buffer */
- if (skb) {
- skb->destructor = NULL;
- sp->call = NULL;
- rxrpc_put_call(call);
- rxrpc_free_skb(skb);
- }
+ rxrpc_free_skb(skb);
_leave(" = %d", ret);
return ret;
@@ -133,11 +124,15 @@
struct rxrpc_skb_priv *sp;
bool terminal;
int ret, ackbit, ack;
+ u32 serial;
+ u8 flags;
_enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
sp = rxrpc_skb(skb);
ASSERTCMP(sp->call, ==, NULL);
+ flags = sp->hdr.flags;
+ serial = sp->hdr.serial;
spin_lock(&call->lock);
@@ -200,8 +195,9 @@
sp->call = call;
rxrpc_get_call(call);
- terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
- !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
+ atomic_inc(&call->skb_count);
+ terminal = ((flags & RXRPC_LAST_PACKET) &&
+ !(flags & RXRPC_CLIENT_INITIATED));
ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
if (ret < 0) {
if (ret == -ENOMEM || ret == -ENOBUFS) {
@@ -213,12 +209,13 @@
}
skb = NULL;
+ sp = NULL;
_debug("post #%u", seq);
ASSERTCMP(call->rx_data_post, ==, seq);
call->rx_data_post++;
- if (sp->hdr.flags & RXRPC_LAST_PACKET)
+ if (flags & RXRPC_LAST_PACKET)
set_bit(RXRPC_CALL_RCVD_LAST, &call->flags);
/* if we've reached an out of sequence packet then we need to drain
@@ -234,7 +231,7 @@
spin_unlock(&call->lock);
atomic_inc(&call->ackr_not_idle);
- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, sp->hdr.serial, false);
+ rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false);
_leave(" = 0 [posted]");
return 0;
@@ -247,7 +244,7 @@
discard_and_ack:
_debug("discard and ACK packet %p", skb);
- __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+ __rxrpc_propose_ACK(call, ack, serial, true);
discard:
spin_unlock(&call->lock);
rxrpc_free_skb(skb);
@@ -255,7 +252,7 @@
return 0;
enqueue_and_ack:
- __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+ __rxrpc_propose_ACK(call, ack, serial, true);
enqueue_packet:
_net("defer skb %p", skb);
spin_unlock(&call->lock);
@@ -575,13 +572,13 @@
* post connection-level events to the connection
* - this includes challenges, responses and some aborts
*/
-static bool rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
_enter("%p,%p", conn, skb);
skb_queue_tail(&conn->rx_queue, skb);
- return rxrpc_queue_conn(conn);
+ rxrpc_queue_conn(conn);
}
/*
@@ -702,7 +699,6 @@
rcu_read_lock();
-retry_find_conn:
conn = rxrpc_find_connection_rcu(local, skb);
if (!conn)
goto cant_route_call;
@@ -710,8 +706,7 @@
if (sp->hdr.callNumber == 0) {
/* Connection-level packet */
_debug("CONN %p {%d}", conn, conn->debug_id);
- if (!rxrpc_post_packet_to_conn(conn, skb))
- goto retry_find_conn;
+ rxrpc_post_packet_to_conn(conn, skb);
} else {
/* Call-bound packets are routed by connection channel. */
unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
@@ -749,6 +744,8 @@
if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
_debug("reject type %d",sp->hdr.type);
rxrpc_reject_packet(local, skb);
+ } else {
+ rxrpc_free_skb(skb);
}
_leave(" [no call]");
return;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index a3fa2ed..9ed66d5 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -203,6 +203,9 @@
}
/* we transferred the whole data packet */
+ if (!(flags & MSG_PEEK))
+ rxrpc_kernel_data_consumed(call, skb);
+
if (sp->hdr.flags & RXRPC_LAST_PACKET) {
_debug("last");
if (rxrpc_conn_is_client(call->conn)) {
@@ -360,28 +363,6 @@
}
/**
- * rxrpc_kernel_data_delivered - Record delivery of data message
- * @skb: Message holding data
- *
- * Record the delivery of a data message. This permits RxRPC to keep its
- * tracking correct. The socket buffer will be deleted.
- */
-void rxrpc_kernel_data_delivered(struct sk_buff *skb)
-{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxrpc_call *call = sp->call;
-
- ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
- ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
- call->rx_data_recv = sp->hdr.seq;
-
- ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
- rxrpc_free_skb(skb);
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_data_delivered);
-
-/**
* rxrpc_kernel_is_data_last - Determine if data message is last one
* @skb: Message holding data
*
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index eee0cfd9..06c51d4 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -98,11 +98,39 @@
spin_unlock_bh(&call->lock);
}
+/**
+ * rxrpc_kernel_data_consumed - Record consumption of data message
+ * @call: The call to which the message pertains.
+ * @skb: Message holding data
+ *
+ * Record the consumption of a data message and generate an ACK if appropriate.
+ * The call state is shifted if this was the final packet. The caller must be
+ * in process context with no spinlocks held.
+ *
+ * TODO: Actually generate the ACK here rather than punting this to the
+ * workqueue.
+ */
+void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ _enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq);
+
+ ASSERTCMP(sp->call, ==, call);
+ ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA);
+
+ /* TODO: Fix the sequence number tracking */
+ ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
+ ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
+ ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
+
+ call->rx_data_recv = sp->hdr.seq;
+ rxrpc_hard_ACK_data(call, sp);
+}
+EXPORT_SYMBOL(rxrpc_kernel_data_consumed);
+
/*
- * destroy a packet that has an RxRPC control buffer
- * - advance the hard-ACK state of the parent call (done here in case something
- * in the kernel bypasses recvmsg() and steals the packet directly off of the
- * socket receive queue)
+ * Destroy a packet that has an RxRPC control buffer
*/
void rxrpc_packet_destructor(struct sk_buff *skb)
{
@@ -112,9 +140,8 @@
_enter("%p{%p}", skb, call);
if (call) {
- /* send the final ACK on a client call */
- if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
- rxrpc_hard_ACK_data(call, sp);
+ if (atomic_dec_return(&call->skb_count) < 0)
+ BUG();
rxrpc_put_call(call);
sp->call = NULL;
}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e4a5f26..d09d068 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -64,7 +64,6 @@
if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
if (p->ops->cleanup)
p->ops->cleanup(p, bind);
- list_del(&p->list);
tcf_hash_destroy(p->hinfo, p);
ret = ACT_P_DELETED;
}
@@ -421,18 +420,19 @@
return res;
}
-int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
- struct tcf_result *res)
+int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
+ int nr_actions, struct tcf_result *res)
{
- const struct tc_action *a;
- int ret = -1;
+ int ret = -1, i;
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
ret = TC_ACT_OK;
goto exec_done;
}
- list_for_each_entry(a, actions, list) {
+ for (i = 0; i < nr_actions; i++) {
+ const struct tc_action *a = actions[i];
+
repeat:
ret = a->ops->act(skb, a, res);
if (ret == TC_ACT_REPEAT)
@@ -754,16 +754,6 @@
return ERR_PTR(err);
}
-static void cleanup_a(struct list_head *actions)
-{
- struct tc_action *a, *tmp;
-
- list_for_each_entry_safe(a, tmp, actions, list) {
- list_del(&a->list);
- kfree(a);
- }
-}
-
static int tca_action_flush(struct net *net, struct nlattr *nla,
struct nlmsghdr *n, u32 portid)
{
@@ -905,7 +895,7 @@
return ret;
}
err:
- cleanup_a(&actions);
+ tcf_action_destroy(&actions, 0);
return ret;
}
@@ -942,15 +932,9 @@
ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
if (ret)
- goto done;
+ return ret;
- /* dump then free all the actions after update; inserted policy
- * stays intact
- */
- ret = tcf_add_notify(net, n, &actions, portid);
- cleanup_a(&actions);
-done:
- return ret;
+ return tcf_add_notify(net, n, &actions, portid);
}
static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index b3c7e97..8a3be1d 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -63,49 +63,8 @@
const struct tc_action_ops *ops)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
- struct tcf_hashinfo *hinfo = tn->hinfo;
- int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
- struct nlattr *nest;
- spin_lock_bh(&hinfo->lock);
-
- s_i = cb->args[0];
-
- for (i = 0; i < (POL_TAB_MASK + 1); i++) {
- struct hlist_head *head;
- struct tc_action *p;
-
- head = &hinfo->htab[tcf_hash(i, POL_TAB_MASK)];
-
- hlist_for_each_entry_rcu(p, head, tcfa_head) {
- index++;
- if (index < s_i)
- continue;
- nest = nla_nest_start(skb, index);
- if (nest == NULL)
- goto nla_put_failure;
- if (type == RTM_DELACTION)
- err = tcf_action_dump_1(skb, p, 0, 1);
- else
- err = tcf_action_dump_1(skb, p, 0, 0);
- if (err < 0) {
- index--;
- nla_nest_cancel(skb, nest);
- goto done;
- }
- nla_nest_end(skb, nest);
- n_i++;
- }
- }
-done:
- spin_unlock_bh(&hinfo->lock);
- if (n_i)
- cb->args[0] += n_i;
- return n_i;
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- goto done;
+ return tcf_generic_walker(tn, skb, cb, type, ops);
}
static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
@@ -125,6 +84,7 @@
struct tcf_police *police;
struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
struct tc_action_net *tn = net_generic(net, police_net_id);
+ bool exists = false;
int size;
if (nla == NULL)
@@ -139,24 +99,24 @@
size = nla_len(tb[TCA_POLICE_TBF]);
if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
return -EINVAL;
- parm = nla_data(tb[TCA_POLICE_TBF]);
- if (parm->index) {
- if (tcf_hash_check(tn, parm->index, a, bind)) {
- if (ovr)
- goto override;
- /* not replacing */
- return -EEXIST;
- }
- } else {
+ parm = nla_data(tb[TCA_POLICE_TBF]);
+ exists = tcf_hash_check(tn, parm->index, a, bind);
+ if (exists && bind)
+ return 0;
+
+ if (!exists) {
ret = tcf_hash_create(tn, parm->index, NULL, a,
&act_police_ops, bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
+ } else {
+ tcf_hash_release(*a, bind);
+ if (!ovr)
+ return -EEXIST;
}
-override:
police = to_police(*a);
if (parm->rate.rate) {
err = -ENOMEM;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 843a716..a7c5645 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -541,8 +541,12 @@
void tcf_exts_destroy(struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
- tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND);
- INIT_LIST_HEAD(&exts->actions);
+ LIST_HEAD(actions);
+
+ tcf_exts_to_list(exts, &actions);
+ tcf_action_destroy(&actions, TCA_ACT_UNBIND);
+ kfree(exts->actions);
+ exts->nr_actions = 0;
#endif
}
EXPORT_SYMBOL(tcf_exts_destroy);
@@ -554,7 +558,6 @@
{
struct tc_action *act;
- INIT_LIST_HEAD(&exts->actions);
if (exts->police && tb[exts->police]) {
act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
"police", ovr,
@@ -563,14 +566,20 @@
return PTR_ERR(act);
act->type = exts->type = TCA_OLD_COMPAT;
- list_add(&act->list, &exts->actions);
+ exts->actions[0] = act;
+ exts->nr_actions = 1;
} else if (exts->action && tb[exts->action]) {
- int err;
+ LIST_HEAD(actions);
+ int err, i = 0;
+
err = tcf_action_init(net, tb[exts->action], rate_tlv,
NULL, ovr,
- TCA_ACT_BIND, &exts->actions);
+ TCA_ACT_BIND, &actions);
if (err)
return err;
+ list_for_each_entry(act, &actions, list)
+ exts->actions[i++] = act;
+ exts->nr_actions = i;
}
}
#else
@@ -587,37 +596,49 @@
struct tcf_exts *src)
{
#ifdef CONFIG_NET_CLS_ACT
- LIST_HEAD(tmp);
+ struct tcf_exts old = *dst;
+
tcf_tree_lock(tp);
- list_splice_init(&dst->actions, &tmp);
- list_splice(&src->actions, &dst->actions);
+ dst->nr_actions = src->nr_actions;
+ dst->actions = src->actions;
dst->type = src->type;
tcf_tree_unlock(tp);
- tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
+
+ tcf_exts_destroy(&old);
#endif
}
EXPORT_SYMBOL(tcf_exts_change);
-#define tcf_exts_first_act(ext) \
- list_first_entry_or_null(&(exts)->actions, \
- struct tc_action, list)
+#ifdef CONFIG_NET_CLS_ACT
+static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
+{
+ if (exts->nr_actions == 0)
+ return NULL;
+ else
+ return exts->actions[0];
+}
+#endif
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
struct nlattr *nest;
- if (exts->action && !list_empty(&exts->actions)) {
+ if (exts->action && exts->nr_actions) {
/*
* again for backward compatible mode - we want
* to work with both old and new modes of entering
* tc data even if iproute2 was newer - jhs
*/
if (exts->type != TCA_OLD_COMPAT) {
+ LIST_HEAD(actions);
+
nest = nla_nest_start(skb, exts->action);
if (nest == NULL)
goto nla_put_failure;
- if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0)
+
+ tcf_exts_to_list(exts, &actions);
+ if (tcf_action_dump(skb, &actions, 0, 0) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
} else if (exts->police) {
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 4cb5aed..ef8ba77 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -293,6 +293,7 @@
return ERR_PTR(err);
}
+ iter->start_fail = 0;
return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
}
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index f69edcf..bb69153 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -13,6 +13,7 @@
{
union sctp_addr laddr, paddr;
struct dst_entry *dst;
+ struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer;
laddr = list_entry(asoc->base.bind_addr.address_list.next,
struct sctp_sockaddr_entry, list)->a;
@@ -40,10 +41,15 @@
}
r->idiag_state = asoc->state;
- r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
- r->idiag_retrans = asoc->rtx_data_chunks;
- r->idiag_expires = jiffies_to_msecs(
- asoc->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] - jiffies);
+ if (timer_pending(t3_rtx)) {
+ r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
+ r->idiag_retrans = asoc->rtx_data_chunks;
+ r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
+ } else {
+ r->idiag_timer = 0;
+ r->idiag_retrans = 0;
+ r->idiag_expires = 0;
+ }
}
static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
@@ -350,7 +356,7 @@
if (cb->args[4] < cb->args[1])
goto next;
- if ((r->idiag_states & ~TCPF_LISTEN) && !list_empty(&ep->asocs))
+ if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs))
goto next;
if (r->sdiag_family != AF_UNSPEC &&
@@ -465,7 +471,7 @@
* 3 : to mark if we have dumped the ep info of the current asoc
* 4 : to work as a temporary variable to traversal list
*/
- if (!(idiag_states & ~TCPF_LISTEN))
+ if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
goto done;
sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp);
done:
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 1bc4f71..d85b803 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -702,14 +702,14 @@
*/
sctp_ulpevent_init(event, 0, skb->len + sizeof(struct sk_buff));
- sctp_ulpevent_receive_data(event, asoc);
-
/* And hold the chunk as we need it for getting the IP headers
* later in recvmsg
*/
sctp_chunk_hold(chunk);
event->chunk = chunk;
+ sctp_ulpevent_receive_data(event, asoc);
+
event->stream = ntohs(chunk->subh.data_hdr->stream);
event->ssn = ntohs(chunk->subh.data_hdr->ssn);
event->ppid = chunk->subh.data_hdr->ppid;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 23c8e7c..976c781 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -340,12 +340,14 @@
}
static struct gss_upcall_msg *
-__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid)
+__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth)
{
struct gss_upcall_msg *pos;
list_for_each_entry(pos, &pipe->in_downcall, list) {
if (!uid_eq(pos->uid, uid))
continue;
+ if (auth && pos->auth->service != auth->service)
+ continue;
atomic_inc(&pos->count);
dprintk("RPC: %s found msg %p\n", __func__, pos);
return pos;
@@ -365,7 +367,7 @@
struct gss_upcall_msg *old;
spin_lock(&pipe->lock);
- old = __gss_find_upcall(pipe, gss_msg->uid);
+ old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth);
if (old == NULL) {
atomic_inc(&gss_msg->count);
list_add(&gss_msg->list, &pipe->in_downcall);
@@ -714,7 +716,7 @@
err = -ENOENT;
/* Find a matching upcall */
spin_lock(&pipe->lock);
- gss_msg = __gss_find_upcall(pipe, uid);
+ gss_msg = __gss_find_upcall(pipe, uid, NULL);
if (gss_msg == NULL) {
spin_unlock(&pipe->lock);
goto err_put_ctx;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index cb49898..7f79fb7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2638,6 +2638,7 @@
{
struct rpc_xprt_switch *xps;
struct rpc_xprt *xprt;
+ unsigned long reconnect_timeout;
unsigned char resvport;
int ret = 0;
@@ -2649,6 +2650,7 @@
return -EAGAIN;
}
resvport = xprt->resvport;
+ reconnect_timeout = xprt->max_reconnect_timeout;
rcu_read_unlock();
xprt = xprt_create_transport(xprtargs);
@@ -2657,6 +2659,7 @@
goto out_put_switch;
}
xprt->resvport = resvport;
+ xprt->max_reconnect_timeout = reconnect_timeout;
rpc_xprt_switch_set_roundrobin(xps);
if (setup) {
@@ -2673,6 +2676,27 @@
}
EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
+static int
+rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt,
+ void *data)
+{
+ unsigned long timeout = *((unsigned long *)data);
+
+ if (timeout < xprt->max_reconnect_timeout)
+ xprt->max_reconnect_timeout = timeout;
+ return 0;
+}
+
+void
+rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo)
+{
+ rpc_clnt_iterate_for_each_xprt(clnt,
+ rpc_xprt_cap_max_reconnect_timeout,
+ &timeo);
+}
+EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
static void rpc_show_header(void)
{
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8313960..ea244b2 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -680,6 +680,20 @@
spin_unlock_bh(&xprt->transport_lock);
}
+static bool
+xprt_has_timer(const struct rpc_xprt *xprt)
+{
+ return xprt->idle_timeout != 0;
+}
+
+static void
+xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
+ __must_hold(&xprt->transport_lock)
+{
+ if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
+ mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
+}
+
static void
xprt_init_autodisconnect(unsigned long data)
{
@@ -688,6 +702,8 @@
spin_lock(&xprt->transport_lock);
if (!list_empty(&xprt->recv))
goto out_abort;
+ /* Reset xprt->last_used to avoid connect/autodisconnect cycling */
+ xprt->last_used = jiffies;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
goto out_abort;
spin_unlock(&xprt->transport_lock);
@@ -725,6 +741,7 @@
goto out;
xprt->snd_task =NULL;
xprt->ops->release_xprt(xprt, NULL);
+ xprt_schedule_autodisconnect(xprt);
out:
spin_unlock_bh(&xprt->transport_lock);
wake_up_bit(&xprt->state, XPRT_LOCKED);
@@ -888,11 +905,6 @@
spin_unlock_bh(&xprt->transport_lock);
}
-static inline int xprt_has_timer(struct rpc_xprt *xprt)
-{
- return xprt->idle_timeout != 0;
-}
-
/**
* xprt_prepare_transmit - reserve the transport before sending a request
* @task: RPC task about to send a request
@@ -1280,9 +1292,7 @@
if (!list_empty(&req->rq_list))
list_del(&req->rq_list);
xprt->last_used = jiffies;
- if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
- mod_timer(&xprt->timer,
- xprt->last_used + xprt->idle_timeout);
+ xprt_schedule_autodisconnect(xprt);
spin_unlock_bh(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(req->rq_buffer);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 111767a..8ede3bc 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -177,7 +177,6 @@
* increase over time if the server is down or not responding.
*/
#define XS_TCP_INIT_REEST_TO (3U * HZ)
-#define XS_TCP_MAX_REEST_TO (5U * 60 * HZ)
/*
* TCP idle timeout; client drops the transport socket if it is idle
@@ -2173,6 +2172,8 @@
write_unlock_bh(&sk->sk_callback_lock);
}
xs_udp_do_set_buffer_size(xprt);
+
+ xprt->stat.connect_start = jiffies;
}
static void xs_udp_setup_socket(struct work_struct *work)
@@ -2236,6 +2237,7 @@
unsigned int keepcnt = xprt->timeout->to_retries + 1;
unsigned int opt_on = 1;
unsigned int timeo;
+ unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
/* TCP Keepalive options */
kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
@@ -2247,6 +2249,16 @@
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
(char *)&keepcnt, sizeof(keepcnt));
+ /* Avoid temporary address, they are bad for long-lived
+ * connections such as NFS mounts.
+ * RFC4941, section 3.6 suggests that:
+ * Individual applications, which have specific
+ * knowledge about the normal duration of connections,
+ * MAY override this as appropriate.
+ */
+ kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
+ (char *)&addr_pref, sizeof(addr_pref));
+
/* TCP user timeout (see RFC5482) */
timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
(xprt->timeout->to_retries + 1);
@@ -2295,6 +2307,10 @@
/* SYN_SENT! */
if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+ break;
+ case -EADDRNOTAVAIL:
+ /* Source port number is unavailable. Try a new one! */
+ transport->srcport = 0;
}
out:
return ret;
@@ -2369,6 +2385,25 @@
xprt_wake_pending_tasks(xprt, status);
}
+static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt)
+{
+ unsigned long start, now = jiffies;
+
+ start = xprt->stat.connect_start + xprt->reestablish_timeout;
+ if (time_after(start, now))
+ return start - now;
+ return 0;
+}
+
+static void xs_reconnect_backoff(struct rpc_xprt *xprt)
+{
+ xprt->reestablish_timeout <<= 1;
+ if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
+ xprt->reestablish_timeout = xprt->max_reconnect_timeout;
+ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+ xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+}
+
/**
* xs_connect - connect a socket to a remote endpoint
* @xprt: pointer to transport structure
@@ -2386,6 +2421,7 @@
static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ unsigned long delay = 0;
WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
@@ -2397,19 +2433,15 @@
/* Start by resetting any existing state */
xs_reset_transport(transport);
- queue_delayed_work(xprtiod_workqueue,
- &transport->connect_worker,
- xprt->reestablish_timeout);
- xprt->reestablish_timeout <<= 1;
- if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
- xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
- if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
- xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
- } else {
+ delay = xs_reconnect_delay(xprt);
+ xs_reconnect_backoff(xprt);
+
+ } else
dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
- queue_delayed_work(xprtiod_workqueue,
- &transport->connect_worker, 0);
- }
+
+ queue_delayed_work(xprtiod_workqueue,
+ &transport->connect_worker,
+ delay);
}
/**
@@ -2961,6 +2993,8 @@
xprt->ops = &xs_tcp_ops;
xprt->timeout = &xs_tcp_default_timeout;
+ xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+
INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index b62caa1..ed97a58 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -728,12 +728,13 @@
u32 bearer_id, u32 *prev_node)
{
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
- struct tipc_peer *peer = mon->self;
+ struct tipc_peer *peer;
if (!mon)
return -EINVAL;
read_lock_bh(&mon->lock);
+ peer = mon->self;
do {
if (*prev_node) {
if (peer->addr == *prev_node)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c49b8df..f9f5f3c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2180,7 +2180,8 @@
TIPC_CONN_MSG, SHORT_H_SIZE,
0, dnode, onode, dport, oport,
TIPC_CONN_SHUTDOWN);
- tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
+ if (skb)
+ tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
}
tsk->connected = 0;
sock->state = SS_DISCONNECTING;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 699dfab..936d7ee 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -87,9 +87,6 @@
vq = vsock->vqs[VSOCK_VQ_TX];
- /* Avoid unnecessary interrupts while we're processing the ring */
- virtqueue_disable_cb(vq);
-
for (;;) {
struct virtio_vsock_pkt *pkt;
struct scatterlist hdr, buf, *sgs[2];
@@ -99,7 +96,6 @@
spin_lock_bh(&vsock->send_pkt_list_lock);
if (list_empty(&vsock->send_pkt_list)) {
spin_unlock_bh(&vsock->send_pkt_list_lock);
- virtqueue_enable_cb(vq);
break;
}
@@ -118,13 +114,13 @@
}
ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL);
+ /* Usually this means that there is no more space available in
+ * the vq
+ */
if (ret < 0) {
spin_lock_bh(&vsock->send_pkt_list_lock);
list_add(&pkt->list, &vsock->send_pkt_list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
-
- if (!virtqueue_enable_cb(vq) && ret == -ENOSPC)
- continue; /* retry now that we have more space */
break;
}
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index b0e11b6..0f50622 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -513,6 +513,7 @@
r = cfg80211_get_chans_dfs_available(wiphy,
chandef->center_freq2,
width);
+ break;
default:
WARN_ON(chandef->center_freq2);
break;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 46417f9..f02653a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5380,6 +5380,7 @@
{
struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1];
u32 mask = 0;
+ u16 ht_opmode;
#define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \
do { \
@@ -5471,9 +5472,36 @@
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0,
mask, NL80211_MESHCONF_RSSI_THRESHOLD,
nl80211_check_s32);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16,
- mask, NL80211_MESHCONF_HT_OPMODE,
- nl80211_check_u16);
+ /*
+ * Check HT operation mode based on
+ * IEEE 802.11 2012 8.4.2.59 HT Operation element.
+ */
+ if (tb[NL80211_MESHCONF_HT_OPMODE]) {
+ ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]);
+
+ if (ht_opmode & ~(IEEE80211_HT_OP_MODE_PROTECTION |
+ IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT |
+ IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+ return -EINVAL;
+
+ if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) &&
+ (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+ return -EINVAL;
+
+ switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) {
+ case IEEE80211_HT_OP_MODE_PROTECTION_NONE:
+ case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ:
+ if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)
+ return -EINVAL;
+ break;
+ case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER:
+ case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED:
+ if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+ return -EINVAL;
+ break;
+ }
+ cfg->ht_opmode = ht_opmode;
+ }
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout,
1, 65535, mask,
NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 217c8d507..7927a09 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -72,8 +72,8 @@
(void *) BPF_FUNC_l3_csum_replace;
static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
(void *) BPF_FUNC_l4_csum_replace;
-static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) =
- (void *) BPF_FUNC_skb_in_cgroup;
+static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_skb_under_cgroup;
#if defined(__x86_64__)
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c
index 2732c37..10ff734 100644
--- a/samples/bpf/test_cgrp2_tc_kern.c
+++ b/samples/bpf/test_cgrp2_tc_kern.c
@@ -57,7 +57,7 @@
bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
eth->h_proto, ip6h->nexthdr);
return TC_ACT_OK;
- } else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
+ } else if (bpf_skb_under_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
bpf_trace_printk(pass_msg, sizeof(pass_msg));
return TC_ACT_OK;
} else {
diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c
index 47bf085..cce2b59 100644
--- a/samples/bpf/test_maps.c
+++ b/samples/bpf/test_maps.c
@@ -68,7 +68,16 @@
assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
errno == E2BIG);
+ /* update existing element, thought the map is full */
+ key = 1;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0);
+ key = 2;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+ key = 1;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+
/* check that key = 0 doesn't exist */
+ key = 0;
assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
/* iterate over two elements */
@@ -413,10 +422,12 @@
for (i = fn; i < MAP_SIZE; i += TASKS) {
key = value = i;
- if (do_update)
+ if (do_update) {
assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
- else
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0);
+ } else {
assert(bpf_delete_elem(map_fd, &key) == 0);
+ }
}
}
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 15b196f..1792198 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -108,16 +108,20 @@
as-instr = $(call try-run,\
printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
+# Do not attempt to build with gcc plugins during cc-option tests.
+# (And this uses delayed resolution so the flags will be up to date.)
+CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
+
# cc-option
# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
cc-option = $(call try-run,\
- $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+ $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
# cc-option-yn
# Usage: flag := $(call cc-option-yn,-march=winchip-c6)
cc-option-yn = $(call try-run,\
- $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
+ $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
# cc-option-align
# Prefix align with either -falign or -malign
@@ -127,7 +131,7 @@
# cc-disable-warning
# Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
cc-disable-warning = $(call try-run,\
- $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+ $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
# cc-name
# Expands to either gcc or clang
diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
index 5e22b60..61f0e6d 100644
--- a/scripts/Makefile.gcc-plugins
+++ b/scripts/Makefile.gcc-plugins
@@ -19,25 +19,42 @@
endif
endif
- GCC_PLUGINS_CFLAGS := $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y))
+ GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
- export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN SANCOV_PLUGIN
+ export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN GCC_PLUGIN_SUBDIR SANCOV_PLUGIN
- ifeq ($(PLUGINCC),)
- ifneq ($(GCC_PLUGINS_CFLAGS),)
- ifeq ($(call cc-ifversion, -ge, 0405, y), y)
- PLUGINCC := $(shell $(CONFIG_SHELL) -x $(srctree)/scripts/gcc-plugin.sh "$(__PLUGINCC)" "$(HOSTCXX)" "$(CC)")
- $(warning warning: your gcc installation does not support plugins, perhaps the necessary headers are missing?)
- else
- $(warning warning: your gcc version does not support plugins, you should upgrade it to gcc 4.5 at least)
- endif
- endif
- else
+ ifneq ($(PLUGINCC),)
# SANCOV_PLUGIN can be only in CFLAGS_KCOV because avoid duplication.
GCC_PLUGINS_CFLAGS := $(filter-out $(SANCOV_PLUGIN), $(GCC_PLUGINS_CFLAGS))
endif
KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS)
GCC_PLUGIN := $(gcc-plugin-y)
-
+ GCC_PLUGIN_SUBDIR := $(gcc-plugin-subdir-y)
endif
+
+# If plugins aren't supported, abort the build before hard-to-read compiler
+# errors start getting spewed by the main build.
+PHONY += gcc-plugins-check
+gcc-plugins-check: FORCE
+ifdef CONFIG_GCC_PLUGINS
+ ifeq ($(PLUGINCC),)
+ ifneq ($(GCC_PLUGINS_CFLAGS),)
+ ifeq ($(call cc-ifversion, -ge, 0405, y), y)
+ $(Q)$(srctree)/scripts/gcc-plugin.sh --show-error "$(__PLUGINCC)" "$(HOSTCXX)" "$(CC)" || true
+ @echo "Cannot use CONFIG_GCC_PLUGINS: your gcc installation does not support plugins, perhaps the necessary headers are missing?" >&2 && exit 1
+ else
+ @echo "Cannot use CONFIG_GCC_PLUGINS: your gcc version does not support plugins, you should upgrade it to at least gcc 4.5" >&2 && exit 1
+ endif
+ endif
+ endif
+endif
+ @:
+
+# Actually do the build, if requested.
+PHONY += gcc-plugins
+gcc-plugins: scripts_basic gcc-plugins-check
+ifdef CONFIG_GCC_PLUGINS
+ $(Q)$(MAKE) $(build)=scripts/gcc-plugins
+endif
+ @:
diff --git a/scripts/gcc-plugin.sh b/scripts/gcc-plugin.sh
index fb920756..b65224b 100755
--- a/scripts/gcc-plugin.sh
+++ b/scripts/gcc-plugin.sh
@@ -1,5 +1,12 @@
#!/bin/sh
srctree=$(dirname "$0")
+
+SHOW_ERROR=
+if [ "$1" = "--show-error" ] ; then
+ SHOW_ERROR=1
+ shift || true
+fi
+
gccplugins_dir=$($3 -print-file-name=plugin)
plugincc=$($1 -E -x c++ - -o /dev/null -I"${srctree}"/gcc-plugins -I"${gccplugins_dir}"/include 2>&1 <<EOF
#include "gcc-common.h"
@@ -13,6 +20,9 @@
if [ $? -ne 0 ]
then
+ if [ -n "$SHOW_ERROR" ] ; then
+ echo "${plugincc}" >&2
+ fi
exit 1
fi
@@ -48,4 +58,8 @@
echo "$2"
exit 0
fi
+
+if [ -n "$SHOW_ERROR" ] ; then
+ echo "${plugincc}" >&2
+fi
exit 1
diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile
index 88c8ec4..8b29dc1 100644
--- a/scripts/gcc-plugins/Makefile
+++ b/scripts/gcc-plugins/Makefile
@@ -12,16 +12,18 @@
export HOST_EXTRACXXFLAGS
endif
-export GCCPLUGINS_DIR HOSTLIBS
-
ifneq ($(CFLAGS_KCOV), $(SANCOV_PLUGIN))
GCC_PLUGIN := $(filter-out $(SANCOV_PLUGIN), $(GCC_PLUGIN))
endif
-$(HOSTLIBS)-y := $(GCC_PLUGIN)
+export HOSTLIBS
+
+$(HOSTLIBS)-y := $(foreach p,$(GCC_PLUGIN),$(if $(findstring /,$(p)),,$(p)))
always := $($(HOSTLIBS)-y)
-cyc_complexity_plugin-objs := cyc_complexity_plugin.o
-sancov_plugin-objs := sancov_plugin.o
+$(foreach p,$($(HOSTLIBS)-y:%.so=%),$(eval $(p)-objs := $(p).o))
+
+subdir-y := $(GCC_PLUGIN_SUBDIR)
+subdir- += $(GCC_PLUGIN_SUBDIR)
clean-files += *.so
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 122fcda..49a00d5 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -432,7 +432,7 @@
die "$P: file '${file}' not found\n";
}
}
- if ($from_filename || vcs_file_exists($file)) {
+ if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
$file =~ s/^\Q${cur_path}\E//; #strip any absolute path
$file =~ s/^\Q${lk_path}\E//; #or the path to the lk tree
push(@files, $file);
diff --git a/security/Kconfig b/security/Kconfig
index 176758c..df28f2b 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -118,6 +118,34 @@
this low address space will need the permission specific to the
systems running LSM.
+config HAVE_HARDENED_USERCOPY_ALLOCATOR
+ bool
+ help
+ The heap allocator implements __check_heap_object() for
+ validating memory ranges against heap object sizes in
+ support of CONFIG_HARDENED_USERCOPY.
+
+config HAVE_ARCH_HARDENED_USERCOPY
+ bool
+ help
+ The architecture supports CONFIG_HARDENED_USERCOPY by
+ calling check_object_size() just before performing the
+ userspace copies in the low level implementation of
+ copy_to_user() and copy_from_user().
+
+config HARDENED_USERCOPY
+ bool "Harden memory copies between kernel and userspace"
+ depends on HAVE_ARCH_HARDENED_USERCOPY
+ select BUG
+ help
+ This option checks for obviously wrong memory regions when
+ copying memory to/from the kernel (via copy_to_user() and
+ copy_from_user() functions) by rejecting memory ranges that
+ are larger than the specified heap object, span multiple
+ separately allocates pages, are not on the process stack,
+ or are part of the kernel text. This kills entire classes
+ of heap overflow exploits and similar kernel memory exposures.
+
source security/selinux/Kconfig
source security/smack/Kconfig
source security/tomoyo/Kconfig
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 89dacf9..160c7f7 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -906,20 +906,23 @@
struct snd_card *card = dev_get_drvdata(dev);
struct azx *chip;
struct hda_intel *hda;
+ struct hdac_bus *bus;
if (!card)
return 0;
chip = card->private_data;
hda = container_of(chip, struct hda_intel, chip);
+ bus = azx_bus(chip);
if (chip->disabled || hda->init_failed || !chip->running)
return 0;
- if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL
- && hda->need_i915_power) {
- snd_hdac_display_power(azx_bus(chip), true);
- snd_hdac_i915_set_bclk(azx_bus(chip));
+ if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
+ snd_hdac_display_power(bus, true);
+ if (hda->need_i915_power)
+ snd_hdac_i915_set_bclk(bus);
}
+
if (chip->msi)
if (pci_enable_msi(pci) < 0)
chip->msi = 0;
@@ -929,6 +932,11 @@
hda_intel_init_chip(chip, true);
+ /* power down again for link-controlled chips */
+ if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
+ !hda->need_i915_power)
+ snd_hdac_display_power(bus, false);
+
snd_power_change_state(card, SNDRV_CTL_POWER_D0);
trace_azx_resume(chip);
@@ -1008,6 +1016,7 @@
chip = card->private_data;
hda = container_of(chip, struct hda_intel, chip);
+ bus = azx_bus(chip);
if (chip->disabled || hda->init_failed)
return 0;
@@ -1015,15 +1024,9 @@
return 0;
if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
- bus = azx_bus(chip);
- if (hda->need_i915_power) {
- snd_hdac_display_power(bus, true);
+ snd_hdac_display_power(bus, true);
+ if (hda->need_i915_power)
snd_hdac_i915_set_bclk(bus);
- } else {
- /* toggle codec wakeup bit for STATESTS read */
- snd_hdac_set_codec_wakeup(bus, true);
- snd_hdac_set_codec_wakeup(bus, false);
- }
}
/* Read STATESTS before controller reset */
@@ -1043,6 +1046,11 @@
azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) &
~STATESTS_INT_MASK);
+ /* power down again for link-controlled chips */
+ if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
+ !hda->need_i915_power)
+ snd_hdac_display_power(bus, false);
+
trace_azx_runtime_resume(chip);
return 0;
}
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 6adde45..6cf1f35 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1128,6 +1128,7 @@
{
/* devices which do not support reading the sample rate. */
switch (chip->usb_id) {
+ case USB_ID(0x041E, 0x4080): /* Creative Live Cam VF0610 */
case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema */
case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
case USB_ID(0x045E, 0x076E): /* MS Lifecam HD-5001 */
@@ -1138,6 +1139,7 @@
case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */
+ case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */
case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */
case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 4a41348..92a8308 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -225,7 +225,6 @@
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
@@ -301,10 +300,6 @@
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-#define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE X86_BUG(10) /* SWAPGS without input dep on GS */
-
-
#ifdef CONFIG_X86_32
/*
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
@@ -312,5 +307,7 @@
*/
#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
#endif
-
+#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 911e935..85599ad 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -56,5 +56,7 @@
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK17 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index 4916144..fac9a5c 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -99,5 +99,7 @@
#define REQUIRED_MASK14 0
#define REQUIRED_MASK15 0
#define REQUIRED_MASK16 0
+#define REQUIRED_MASK17 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index 5b15d94..37fee27 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -78,7 +78,6 @@
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
-#define EXIT_REASON_PCOMMIT 65
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -127,8 +126,7 @@
{ EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }, \
{ EXIT_REASON_XSAVES, "XSAVES" }, \
- { EXIT_REASON_XRSTORS, "XRSTORS" }, \
- { EXIT_REASON_PCOMMIT, "PCOMMIT" }
+ { EXIT_REASON_XRSTORS, "XRSTORS" }
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 406459b..da218fe 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -84,6 +84,7 @@
BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE,
+ BPF_MAP_TYPE_CGROUP_ARRAY,
};
enum bpf_prog_type {
@@ -93,6 +94,7 @@
BPF_PROG_TYPE_SCHED_CLS,
BPF_PROG_TYPE_SCHED_ACT,
BPF_PROG_TYPE_TRACEPOINT,
+ BPF_PROG_TYPE_XDP,
};
#define BPF_PSEUDO_MAP_FD 1
@@ -313,6 +315,66 @@
*/
BPF_FUNC_skb_get_tunnel_opt,
BPF_FUNC_skb_set_tunnel_opt,
+
+ /**
+ * bpf_skb_change_proto(skb, proto, flags)
+ * Change protocol of the skb. Currently supported is
+ * v4 -> v6, v6 -> v4 transitions. The helper will also
+ * resize the skb. eBPF program is expected to fill the
+ * new headers via skb_store_bytes and lX_csum_replace.
+ * @skb: pointer to skb
+ * @proto: new skb->protocol type
+ * @flags: reserved
+ * Return: 0 on success or negative error
+ */
+ BPF_FUNC_skb_change_proto,
+
+ /**
+ * bpf_skb_change_type(skb, type)
+ * Change packet type of skb.
+ * @skb: pointer to skb
+ * @type: new skb->pkt_type type
+ * Return: 0 on success or negative error
+ */
+ BPF_FUNC_skb_change_type,
+
+ /**
+ * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+ * @skb: pointer to skb
+ * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ * @index: index of the cgroup in the bpf_map
+ * Return:
+ * == 0 skb failed the cgroup2 descendant test
+ * == 1 skb succeeded the cgroup2 descendant test
+ * < 0 error
+ */
+ BPF_FUNC_skb_in_cgroup,
+
+ /**
+ * bpf_get_hash_recalc(skb)
+ * Retrieve and possibly recalculate skb->hash.
+ * @skb: pointer to skb
+ * Return: hash
+ */
+ BPF_FUNC_get_hash_recalc,
+
+ /**
+ * u64 bpf_get_current_task(void)
+ * Returns current task_struct
+ * Return: current
+ */
+ BPF_FUNC_get_current_task,
+
+ /**
+ * bpf_probe_write_user(void *dst, void *src, int len)
+ * safely attempt to write to a location
+ * @dst: destination address in userspace
+ * @src: source address on stack
+ * @len: number of bytes to copy
+ * Return: 0 on success or negative error
+ */
+ BPF_FUNC_probe_write_user,
+
__BPF_FUNC_MAX_ID,
};
@@ -347,9 +409,11 @@
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
-/* BPF_FUNC_perf_event_output flags. */
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
@@ -386,4 +450,24 @@
__u32 tunnel_label;
};
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+ XDP_ABORTED = 0,
+ XDP_DROP,
+ XDP_PASS,
+ XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+ __u32 data;
+ __u32 data_end;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 736da44..b303bcd 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -176,10 +176,18 @@
'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
'$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
+TYPES
+-----
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe.
+String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
LINE SYNTAX
-----------
Line range is described by following syntax.
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 1f6c705..053bbbd 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,8 +116,8 @@
--fields::
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
- srcline, period, iregs, brstack, brstacksym, flags.
- Field list can be prepended with the type, trace, sw or hw,
+ srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
+ callindent. Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index c6d0f91..8d4dc97 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -54,10 +54,6 @@
#endif
#if defined(_CALL_ELF) && _CALL_ELF == 2
-bool arch__prefers_symtab(void)
-{
- return true;
-}
#ifdef HAVE_LIBELF_SUPPORT
void arch__sym_update(struct symbol *s, GElf_Sym *sym)
@@ -100,4 +96,27 @@
tev->point.offset += lep_offset;
}
}
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+ int ntevs)
+{
+ struct probe_trace_event *tev;
+ struct map *map;
+ struct symbol *sym = NULL;
+ struct rb_node *tmp;
+ int i = 0;
+
+ map = get_target_map(pev->target, pev->uprobes);
+ if (!map || map__load(map, NULL) < 0)
+ return;
+
+ for (i = 0; i < ntevs; i++) {
+ tev = &pev->tevs[i];
+ map__for_each_symbol(map, sym, tmp) {
+ if (map->unmap_ip(map, sym->start) == tev->point.address)
+ arch__fix_tev_from_maps(pev, tev, map, sym);
+ }
+ }
+}
+
#endif
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 971ff91..9c640a8 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2116,7 +2116,7 @@
"Valid types: hw,sw,trace,raw. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,period,iregs,brstack,brstacksym,flags,"
- "callindent", parse_output_fields),
+ "bpf-output,callindent", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 0c16d20..3c7452b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -331,7 +331,7 @@
return 0;
}
-static void read_counters(bool close_counters)
+static void read_counters(void)
{
struct perf_evsel *counter;
@@ -341,11 +341,6 @@
if (perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\n", counter->name);
-
- if (close_counters) {
- perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
- thread_map__nr(evsel_list->threads));
- }
}
}
@@ -353,7 +348,7 @@
{
struct timespec ts, rs;
- read_counters(false);
+ read_counters();
clock_gettime(CLOCK_MONOTONIC, &ts);
diff_timespec(&rs, &ts, &ref_time);
@@ -380,6 +375,17 @@
perf_evlist__enable(evsel_list);
}
+static void disable_counters(void)
+{
+ /*
+ * If we don't have tracee (attaching to task or cpu), counters may
+ * still be running. To get accurate group ratios, we must stop groups
+ * from counting before reading their constituent counters.
+ */
+ if (!target__none(&target))
+ perf_evlist__disable(evsel_list);
+}
+
static volatile int workload_exec_errno;
/*
@@ -657,11 +663,20 @@
}
}
+ disable_counters();
+
t1 = rdclock();
update_stats(&walltime_nsecs_stats, t1 - t0);
- read_counters(true);
+ /*
+ * Closing a group leader splits the group, and as we only disable
+ * group leaders, results in remaining events becoming enabled. To
+ * avoid arbitrary skew, we must read all counters before closing any
+ * group leaders.
+ */
+ read_counters();
+ perf_evlist__close(evsel_list);
return WEXITSTATUS(status);
}
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 953dc1a..2873396 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -170,15 +170,17 @@
module = "kernel";
for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+ /* short_name is "[module]" */
if (strncmp(pos->dso->short_name + 1, module,
- pos->dso->short_name_len - 2) == 0) {
+ pos->dso->short_name_len - 2) == 0 &&
+ module[pos->dso->short_name_len - 2] == '\0') {
return pos;
}
}
return NULL;
}
-static struct map *get_target_map(const char *target, bool user)
+struct map *get_target_map(const char *target, bool user)
{
/* Init maps of given executable or kernel */
if (user)
@@ -385,7 +387,7 @@
if (uprobes)
address = sym->start;
else
- address = map->unmap_ip(map, sym->start);
+ address = map->unmap_ip(map, sym->start) - map->reloc;
break;
}
if (!address) {
@@ -664,22 +666,14 @@
return ret;
}
-/* Post processing the probe events */
-static int post_process_probe_trace_events(struct probe_trace_event *tevs,
- int ntevs, const char *module,
- bool uprobe)
+static int
+post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
+ int ntevs)
{
struct ref_reloc_sym *reloc_sym;
char *tmp;
int i, skipped = 0;
- if (uprobe)
- return add_exec_to_probe_trace_events(tevs, ntevs, module);
-
- /* Note that currently ref_reloc_sym based probe is not for drivers */
- if (module)
- return add_module_to_probe_trace_events(tevs, ntevs, module);
-
reloc_sym = kernel_get_ref_reloc_sym();
if (!reloc_sym) {
pr_warning("Relocated base symbol is not found!\n");
@@ -711,6 +705,34 @@
return skipped;
}
+void __weak
+arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused,
+ int ntevs __maybe_unused)
+{
+}
+
+/* Post processing the probe events */
+static int post_process_probe_trace_events(struct perf_probe_event *pev,
+ struct probe_trace_event *tevs,
+ int ntevs, const char *module,
+ bool uprobe)
+{
+ int ret;
+
+ if (uprobe)
+ ret = add_exec_to_probe_trace_events(tevs, ntevs, module);
+ else if (module)
+ /* Currently ref_reloc_sym based probe is not for drivers */
+ ret = add_module_to_probe_trace_events(tevs, ntevs, module);
+ else
+ ret = post_process_kernel_probe_trace_events(tevs, ntevs);
+
+ if (ret >= 0)
+ arch__post_process_probe_trace_events(pev, ntevs);
+
+ return ret;
+}
+
/* Try to find perf_probe_event with debuginfo */
static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs)
@@ -749,7 +771,7 @@
if (ntevs > 0) { /* Succeeded to find trace events */
pr_debug("Found %d probe_trace_events.\n", ntevs);
- ret = post_process_probe_trace_events(*tevs, ntevs,
+ ret = post_process_probe_trace_events(pev, *tevs, ntevs,
pev->target, pev->uprobes);
if (ret < 0 || ret == ntevs) {
clear_probe_trace_events(*tevs, ntevs);
@@ -2936,8 +2958,6 @@
return err;
}
-bool __weak arch__prefers_symtab(void) { return false; }
-
/* Concatinate two arrays */
static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
{
@@ -3158,12 +3178,6 @@
if (ret > 0 || pev->sdt) /* SDT can be found only in the cache */
return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
- if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
- ret = find_probe_trace_events_from_map(pev, tevs);
- if (ret > 0)
- return ret; /* Found in symbol table */
- }
-
/* Convert perf_probe_event with debuginfo */
ret = try_to_find_probe_trace_events(pev, tevs);
if (ret != 0)
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e18ea9f..f4f45db 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -158,7 +158,6 @@
int show_available_vars(struct perf_probe_event *pevs, int npevs,
struct strfilter *filter);
int show_available_funcs(const char *module, struct strfilter *filter, bool user);
-bool arch__prefers_symtab(void);
void arch__fix_tev_from_maps(struct perf_probe_event *pev,
struct probe_trace_event *tev, struct map *map,
struct symbol *sym);
@@ -173,4 +172,9 @@
int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
struct perf_probe_arg *pvar);
+struct map *get_target_map(const char *target, bool user);
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+ int ntevs);
+
#endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index f2d9ff0..5c290c6 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -297,10 +297,13 @@
char sbuf[STRERR_BUFSIZE];
int bsize, boffs, total;
int ret;
+ char sign;
/* TODO: check all types */
- if (cast && strcmp(cast, "string") != 0) {
+ if (cast && strcmp(cast, "string") != 0 &&
+ strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
/* Non string type is OK */
+ /* and respect signedness cast */
tvar->type = strdup(cast);
return (tvar->type == NULL) ? -ENOMEM : 0;
}
@@ -361,6 +364,13 @@
return (tvar->type == NULL) ? -ENOMEM : 0;
}
+ if (cast && (strcmp(cast, "u") == 0))
+ sign = 'u';
+ else if (cast && (strcmp(cast, "s") == 0))
+ sign = 's';
+ else
+ sign = die_is_signed_type(&type) ? 's' : 'u';
+
ret = dwarf_bytesize(&type);
if (ret <= 0)
/* No size ... try to use default type */
@@ -373,8 +383,7 @@
dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
ret = MAX_BASIC_TYPE_BITS;
}
- ret = snprintf(buf, 16, "%c%d",
- die_is_signed_type(&type) ? 's' : 'u', ret);
+ ret = snprintf(buf, 16, "%c%d", sign, ret);
formatted:
if (ret < 0 || ret >= 16) {
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 947d21f..3d3cb83 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -588,7 +588,11 @@
} else {
pevent_event_info(&seq, evsel->tp_format, &rec);
}
- return seq.buffer;
+ /*
+ * Trim the buffer, it starts at 4KB and we're not going to
+ * add anything more to this buffer.
+ */
+ return realloc(seq.buffer, seq.len + 1);
}
static int64_t
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 5404efa..dd48f42 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -13,6 +13,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
#include <linux/libnvdimm.h>
#include <linux/vmalloc.h>
#include <linux/device.h>
@@ -1474,6 +1475,7 @@
if (nfit_test->setup != nfit_test0_setup)
return 0;
+ flush_work(&acpi_desc->work);
nfit_test->setup_hotplug = 1;
nfit_test->setup(nfit_test);
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 3c40c9d..1cc6d64 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -8,7 +8,7 @@
GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
-CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
+CFLAGS := -std=gnu99 -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
export CFLAGS
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
index 4f93af8..18601f6 100644
--- a/tools/virtio/linux/dma-mapping.h
+++ b/tools/virtio/linux/dma-mapping.h
@@ -14,4 +14,20 @@
DMA_NONE = 3,
};
+#define dma_alloc_coherent(d, s, hp, f) ({ \
+ void *__dma_alloc_coherent_p = kmalloc((s), (f)); \
+ *(hp) = (unsigned long)__dma_alloc_coherent_p; \
+ __dma_alloc_coherent_p; \
+})
+
+#define dma_free_coherent(d, s, p, h) kfree(p)
+
+#define dma_map_page(d, p, o, s, dir) (page_to_phys(p) + (o))
+
+#define dma_map_single(d, p, s, dir) (virt_to_phys(p))
+#define dma_mapping_error(...) (0)
+
+#define dma_unmap_single(...) do { } while (0)
+#define dma_unmap_page(...) do { } while (0)
+
#endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 0338499..d9554fc 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -20,7 +20,9 @@
#define PAGE_SIZE getpagesize()
#define PAGE_MASK (~(PAGE_SIZE-1))
+#define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK)
+typedef unsigned long long phys_addr_t;
typedef unsigned long long dma_addr_t;
typedef size_t __kernel_size_t;
typedef unsigned int __wsum;
@@ -57,6 +59,11 @@
return p;
}
+static inline void *alloc_pages_exact(size_t s, gfp_t gfp)
+{
+ return kmalloc(s, gfp);
+}
+
static inline void kfree(void *p)
{
if (p >= __kfree_ignore_start && p < __kfree_ignore_end)
@@ -64,6 +71,11 @@
free(p);
}
+static inline void free_pages_exact(void *p, size_t s)
+{
+ kfree(p);
+}
+
static inline void *krealloc(void *p, size_t s, gfp_t gfp)
{
return realloc(p, s);
@@ -105,6 +117,8 @@
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n"))
+
#define min(x, y) ({ \
typeof(x) _min1 = (x); \
typeof(y) _min2 = (y); \
diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h
index 81baeac..7e1c119 100644
--- a/tools/virtio/linux/slab.h
+++ b/tools/virtio/linux/slab.h
@@ -1,2 +1,6 @@
#ifndef LINUX_SLAB_H
+#define GFP_KERNEL 0
+#define GFP_ATOMIC 0
+#define __GFP_NOWARN 0
+#define __GFP_ZERO 0
#endif
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index ee125e7..9377c8b 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -3,8 +3,12 @@
#include <linux/scatterlist.h>
#include <linux/kernel.h>
+struct device {
+ void *parent;
+};
+
struct virtio_device {
- void *dev;
+ struct device dev;
u64 features;
};
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index 57a6964..9ba1181 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -40,6 +40,19 @@
#define virtio_has_feature(dev, feature) \
(__virtio_test_bit((dev), feature))
+/**
+ * virtio_has_iommu_quirk - determine whether this device has the iommu quirk
+ * @vdev: the device
+ */
+static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
+{
+ /*
+ * Note the reverse polarity of the quirk feature (compared to most
+ * other features), this is for compatibility with legacy systems.
+ */
+ return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+}
+
static inline bool virtio_is_little_endian(struct virtio_device *vdev)
{
return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index 68e4f9f..bd2ad1d 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -13,6 +13,7 @@
#define cache_line_size() SMP_CACHE_BYTES
#define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES)))
#define unlikely(x) (__builtin_expect(!!(x), 0))
+#define likely(x) (__builtin_expect(!!(x), 1))
#define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
typedef pthread_spinlock_t spinlock_t;
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 4fde8c7..77e6ccf 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -33,6 +33,7 @@
static struct timecounter *timecounter;
static struct workqueue_struct *wqueue;
static unsigned int host_vtimer_irq;
+static u32 host_vtimer_irq_flags;
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
@@ -365,7 +366,7 @@
static void kvm_timer_init_interrupt(void *info)
{
- enable_percpu_irq(host_vtimer_irq, 0);
+ enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
}
int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
@@ -432,6 +433,14 @@
}
host_vtimer_irq = info->virtual_irq;
+ host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
+ if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
+ host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
+ kvm_err("Invalid trigger for IRQ%d, assuming level low\n",
+ host_vtimer_irq);
+ host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
+ }
+
err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
"kvm guest timer", kvm_get_running_vcpus());
if (err) {
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index fb4b0a7..83777c1 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -73,12 +73,8 @@
int i, vcpu_lock_idx = -1, ret;
struct kvm_vcpu *vcpu;
- mutex_lock(&kvm->lock);
-
- if (irqchip_in_kernel(kvm)) {
- ret = -EEXIST;
- goto out;
- }
+ if (irqchip_in_kernel(kvm))
+ return -EEXIST;
/*
* This function is also called by the KVM_CREATE_IRQCHIP handler,
@@ -87,10 +83,8 @@
* the proper checks already.
*/
if (type == KVM_DEV_TYPE_ARM_VGIC_V2 &&
- !kvm_vgic_global_state.can_emulate_gicv2) {
- ret = -ENODEV;
- goto out;
- }
+ !kvm_vgic_global_state.can_emulate_gicv2)
+ return -ENODEV;
/*
* Any time a vcpu is run, vcpu_load is called which tries to grab the
@@ -138,9 +132,6 @@
vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
mutex_unlock(&vcpu->mutex);
}
-
-out:
- mutex_unlock(&kvm->lock);
return ret;
}
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 07411cf..4660a7d 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -51,7 +51,7 @@
irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL);
if (!irq)
- return NULL;
+ return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&irq->lpi_list);
INIT_LIST_HEAD(&irq->ap_list);
@@ -441,39 +441,63 @@
* Find the target VCPU and the LPI number for a given devid/eventid pair
* and make this IRQ pending, possibly injecting it.
* Must be called with the its_lock mutex held.
+ * Returns 0 on success, a positive error value for any ITS mapping
+ * related errors and negative error values for generic errors.
*/
-static void vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
- u32 devid, u32 eventid)
+static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
+ u32 devid, u32 eventid)
{
+ struct kvm_vcpu *vcpu;
struct its_itte *itte;
if (!its->enabled)
- return;
+ return -EBUSY;
itte = find_itte(its, devid, eventid);
- /* Triggering an unmapped IRQ gets silently dropped. */
- if (itte && its_is_collection_mapped(itte->collection)) {
- struct kvm_vcpu *vcpu;
+ if (!itte || !its_is_collection_mapped(itte->collection))
+ return E_ITS_INT_UNMAPPED_INTERRUPT;
- vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
- if (vcpu && vcpu->arch.vgic_cpu.lpis_enabled) {
- spin_lock(&itte->irq->irq_lock);
- itte->irq->pending = true;
- vgic_queue_irq_unlock(kvm, itte->irq);
- }
- }
+ vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
+ if (!vcpu)
+ return E_ITS_INT_UNMAPPED_INTERRUPT;
+
+ if (!vcpu->arch.vgic_cpu.lpis_enabled)
+ return -EBUSY;
+
+ spin_lock(&itte->irq->irq_lock);
+ itte->irq->pending = true;
+ vgic_queue_irq_unlock(kvm, itte->irq);
+
+ return 0;
+}
+
+static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
+{
+ struct vgic_io_device *iodev;
+
+ if (dev->ops != &kvm_io_gic_ops)
+ return NULL;
+
+ iodev = container_of(dev, struct vgic_io_device, dev);
+
+ if (iodev->iodev_type != IODEV_ITS)
+ return NULL;
+
+ return iodev;
}
/*
* Queries the KVM IO bus framework to get the ITS pointer from the given
* doorbell address.
* We then call vgic_its_trigger_msi() with the decoded data.
+ * According to the KVM_SIGNAL_MSI API description returns 1 on success.
*/
int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
{
u64 address;
struct kvm_io_device *kvm_io_dev;
struct vgic_io_device *iodev;
+ int ret;
if (!vgic_has_its(kvm))
return -ENODEV;
@@ -485,15 +509,28 @@
kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
if (!kvm_io_dev)
- return -ENODEV;
+ return -EINVAL;
- iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
+ iodev = vgic_get_its_iodev(kvm_io_dev);
+ if (!iodev)
+ return -EINVAL;
mutex_lock(&iodev->its->its_lock);
- vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
+ ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
mutex_unlock(&iodev->its->its_lock);
- return 0;
+ if (ret < 0)
+ return ret;
+
+ /*
+ * KVM_SIGNAL_MSI demands a return value > 0 for success and 0
+ * if the guest has blocked the MSI. So we map any LPI mapping
+ * related error to that.
+ */
+ if (ret)
+ return 0;
+ else
+ return 1;
}
/* Requires the its_lock to be held. */
@@ -502,7 +539,8 @@
list_del(&itte->itte_list);
/* This put matches the get in vgic_add_lpi. */
- vgic_put_irq(kvm, itte->irq);
+ if (itte->irq)
+ vgic_put_irq(kvm, itte->irq);
kfree(itte);
}
@@ -697,6 +735,7 @@
struct its_device *device;
struct its_collection *collection, *new_coll = NULL;
int lpi_nr;
+ struct vgic_irq *irq;
device = find_its_device(its, device_id);
if (!device)
@@ -710,6 +749,10 @@
lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser))
return E_ITS_MAPTI_PHYSICALID_OOR;
+ /* If there is an existing mapping, behavior is UNPREDICTABLE. */
+ if (find_itte(its, device_id, event_id))
+ return 0;
+
collection = find_collection(its, coll_id);
if (!collection) {
int ret = vgic_its_alloc_collection(its, &collection, coll_id);
@@ -718,22 +761,28 @@
new_coll = collection;
}
- itte = find_itte(its, device_id, event_id);
+ itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
if (!itte) {
- itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
- if (!itte) {
- if (new_coll)
- vgic_its_free_collection(its, coll_id);
- return -ENOMEM;
- }
-
- itte->event_id = event_id;
- list_add_tail(&itte->itte_list, &device->itt_head);
+ if (new_coll)
+ vgic_its_free_collection(its, coll_id);
+ return -ENOMEM;
}
+ itte->event_id = event_id;
+ list_add_tail(&itte->itte_list, &device->itt_head);
+
itte->collection = collection;
itte->lpi = lpi_nr;
- itte->irq = vgic_add_lpi(kvm, lpi_nr);
+
+ irq = vgic_add_lpi(kvm, lpi_nr);
+ if (IS_ERR(irq)) {
+ if (new_coll)
+ vgic_its_free_collection(its, coll_id);
+ its_free_itte(kvm, itte);
+ return PTR_ERR(irq);
+ }
+ itte->irq = irq;
+
update_affinity_itte(kvm, itte);
/*
@@ -981,9 +1030,7 @@
u32 msi_data = its_cmd_get_id(its_cmd);
u64 msi_devid = its_cmd_get_deviceid(its_cmd);
- vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
-
- return 0;
+ return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
}
/*
@@ -1288,13 +1335,13 @@
its_sync_lpi_pending_table(vcpu);
}
-static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its)
+static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
{
struct vgic_io_device *iodev = &its->iodev;
int ret;
- if (its->initialized)
- return 0;
+ if (!its->initialized)
+ return -EBUSY;
if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base))
return -ENXIO;
@@ -1311,9 +1358,6 @@
KVM_VGIC_V3_ITS_SIZE, &iodev->dev);
mutex_unlock(&kvm->slots_lock);
- if (!ret)
- its->initialized = true;
-
return ret;
}
@@ -1435,9 +1479,6 @@
if (type != KVM_VGIC_ITS_ADDR_TYPE)
return -ENODEV;
- if (its->initialized)
- return -EBUSY;
-
if (copy_from_user(&addr, uaddr, sizeof(addr)))
return -EFAULT;
@@ -1453,7 +1494,9 @@
case KVM_DEV_ARM_VGIC_GRP_CTRL:
switch (attr->attr) {
case KVM_DEV_ARM_VGIC_CTRL_INIT:
- return vgic_its_init_its(dev->kvm, its);
+ its->initialized = true;
+
+ return 0;
}
break;
}
@@ -1498,3 +1541,30 @@
return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
KVM_DEV_TYPE_ARM_VGIC_ITS);
}
+
+/*
+ * Registers all ITSes with the kvm_io_bus framework.
+ * To follow the existing VGIC initialization sequence, this has to be
+ * done as late as possible, just before the first VCPU runs.
+ */
+int vgic_register_its_iodevs(struct kvm *kvm)
+{
+ struct kvm_device *dev;
+ int ret = 0;
+
+ list_for_each_entry(dev, &kvm->devices, vm_node) {
+ if (dev->ops != &kvm_arm_vgic_its_ops)
+ continue;
+
+ ret = vgic_register_its_iodev(kvm, dev->private);
+ if (ret)
+ return ret;
+ /*
+ * We don't need to care about tearing down previously
+ * registered ITSes, as the kvm_io_bus framework removes
+ * them for us if the VM gets destroyed.
+ */
+ }
+
+ return ret;
+}
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index ff668e0..90d8181 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -306,16 +306,19 @@
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- u64 propbaser = dist->propbaser;
+ u64 old_propbaser, propbaser;
/* Storing a value with LPIs already enabled is undefined */
if (vgic_cpu->lpis_enabled)
return;
- propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
- propbaser = vgic_sanitise_propbaser(propbaser);
-
- dist->propbaser = propbaser;
+ do {
+ old_propbaser = dist->propbaser;
+ propbaser = old_propbaser;
+ propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
+ propbaser = vgic_sanitise_propbaser(propbaser);
+ } while (cmpxchg64(&dist->propbaser, old_propbaser,
+ propbaser) != old_propbaser);
}
static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu,
@@ -331,16 +334,19 @@
unsigned long val)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- u64 pendbaser = vgic_cpu->pendbaser;
+ u64 old_pendbaser, pendbaser;
/* Storing a value with LPIs already enabled is undefined */
if (vgic_cpu->lpis_enabled)
return;
- pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
- pendbaser = vgic_sanitise_pendbaser(pendbaser);
-
- vgic_cpu->pendbaser = pendbaser;
+ do {
+ old_pendbaser = vgic_cpu->pendbaser;
+ pendbaser = old_pendbaser;
+ pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
+ pendbaser = vgic_sanitise_pendbaser(pendbaser);
+ } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser,
+ pendbaser) != old_pendbaser);
}
/*
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 0506543..9f0dae3 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -289,6 +289,14 @@
goto out;
}
+ if (vgic_has_its(kvm)) {
+ ret = vgic_register_its_iodevs(kvm);
+ if (ret) {
+ kvm_err("Unable to register VGIC ITS MMIO regions\n");
+ goto out;
+ }
+ }
+
dist->ready = true;
out:
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index e7aeac7..e83b7fe 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -117,17 +117,17 @@
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{
- struct vgic_dist *dist;
+ struct vgic_dist *dist = &kvm->arch.vgic;
if (irq->intid < VGIC_MIN_LPI)
return;
- if (!kref_put(&irq->refcount, vgic_irq_release))
- return;
-
- dist = &kvm->arch.vgic;
-
spin_lock(&dist->lpi_list_lock);
+ if (!kref_put(&irq->refcount, vgic_irq_release)) {
+ spin_unlock(&dist->lpi_list_lock);
+ return;
+ };
+
list_del(&irq->lpi_list);
dist->lpi_list_count--;
spin_unlock(&dist->lpi_list_lock);
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 1d8e21d..6c4625c 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -84,6 +84,7 @@
int vgic_v3_probe(const struct gic_kvm_info *info);
int vgic_v3_map_resources(struct kvm *kvm);
int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
+int vgic_register_its_iodevs(struct kvm *kvm);
bool vgic_has_its(struct kvm *kvm);
int kvm_vgic_register_its_device(void);
void vgic_enable_lpis(struct kvm_vcpu *vcpu);
@@ -140,6 +141,11 @@
return -ENODEV;
}
+static inline int vgic_register_its_iodevs(struct kvm *kvm)
+{
+ return -ENODEV;
+}
+
static inline bool vgic_has_its(struct kvm *kvm)
{
return false;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cc081cc..1950782 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -696,6 +696,11 @@
{
struct kvm_device *dev, *tmp;
+ /*
+ * We do not need to take the kvm->lock here, because nobody else
+ * has a reference to the struct kvm at this point and therefore
+ * cannot access the devices list anyhow.
+ */
list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) {
list_del(&dev->vm_node);
dev->ops->destroy(dev);
@@ -2832,19 +2837,28 @@
dev->ops = ops;
dev->kvm = kvm;
+ mutex_lock(&kvm->lock);
ret = ops->create(dev, cd->type);
if (ret < 0) {
+ mutex_unlock(&kvm->lock);
kfree(dev);
return ret;
}
+ list_add(&dev->vm_node, &kvm->devices);
+ mutex_unlock(&kvm->lock);
+
+ if (ops->init)
+ ops->init(dev);
ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
if (ret < 0) {
ops->destroy(dev);
+ mutex_lock(&kvm->lock);
+ list_del(&dev->vm_node);
+ mutex_unlock(&kvm->lock);
return ret;
}
- list_add(&dev->vm_node, &kvm->devices);
kvm_get_kvm(kvm);
cd->fd = ret;
return 0;